contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

   1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file describes the PTX instructions in TableGen format.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 include "NVPTXInstrFormats.td"
  15
  16 // A NOP instruction
  17 def NOP : NVPTXInst<(outs), (ins), "", []>;
  18
  19 // List of vector specific properties
  20 def isVecLD      : VecInstTypeEnum<1>;
  21 def isVecST      : VecInstTypeEnum<2>;
  22 def isVecBuild   : VecInstTypeEnum<3>;
  23 def isVecShuffle : VecInstTypeEnum<4>;
  24 def isVecExtract : VecInstTypeEnum<5>;
  25 def isVecInsert  : VecInstTypeEnum<6>;
  26 def isVecDest    : VecInstTypeEnum<7>;
  27 def isVecOther   : VecInstTypeEnum<15>;
  28
  29 //===----------------------------------------------------------------------===//
  30 // NVPTX Operand Definitions.
  31 //===----------------------------------------------------------------------===//
  32
  33 def brtarget    : Operand<OtherVT>;
  34
  35 //===----------------------------------------------------------------------===//
  36 // NVPTX Instruction Predicate Definitions
  37 //===----------------------------------------------------------------------===//
  38
  39
  40 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
  41 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
  42 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
  43 def useAtomRedG32forGen32 :
  44   Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
  45 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
  46 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
  47 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
  48 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
  49 def useAtomRedG64forGen64 :
  50   Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
  51 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
  52 def hasVote : Predicate<"Subtarget.hasVote()">;
  53 def hasDouble : Predicate<"Subtarget.hasDouble()">;
  54 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
  55 def hasLDG : Predicate<"Subtarget.hasLDG()">;
  56 def hasLDU : Predicate<"Subtarget.hasLDU()">;
  57 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
  58
  59 def doF32FTZ : Predicate<"UseF32FTZ">;
  60
  61 def doFMAF32      : Predicate<"doFMAF32">;
  62 def doFMAF32_ftz  : Predicate<"(doFMAF32 && UseF32FTZ)">;
  63 def doFMAF32AGG      : Predicate<"doFMAF32AGG">;
  64 def doFMAF32AGG_ftz  : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
  65 def doFMAF64      : Predicate<"doFMAF64">;
  66 def doFMAF64AGG      : Predicate<"doFMAF64AGG">;
  67 def doFMADF32     : Predicate<"doFMADF32">;
  68 def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
  69
  70 def doMulWide      : Predicate<"doMulWide">;
  71
  72 def allowFMA : Predicate<"allowFMA">;
  73 def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
  74
  75 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
  76 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
  77
  78 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
  79
  80 def true : Predicate<"1">;
  81
  82 //===----------------------------------------------------------------------===//
  83 // Special Handling for 8-bit Operands and Operations
  84 //
  85 // PTX supports 8-bit signed and unsigned types, but does not support 8-bit
  86 // operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
  87 // 8-bit registers.
  88 //
  89 // PTX ld, st and cvt instructions permit source and destination data operands
  90 // to be wider than the instruction-type size, so that narrow values may be
  91 // loaded, stored, and converted using regular-width registers.
  92 //
  93 // So in PTX generation, we
  94 // - always use 16-bit registers in place in 8-bit registers.
  95 //   (8-bit variables should stay as 8-bit as they represent memory layout.)
  96 // - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
  97 //   before operation
  98 //   . div
  99 //   . rem
 100 //   . neg (sign)
 101 //   . set, setp
 102 //   . shr
 103 //
 104 // We are patching the operations by inserting the cvt instructions in the
 105 // asm strings of the affected instructions.
 106 //
 107 // Since vector operations, except for ld/st, are eventually elementized. We
 108 // do not need to special-hand the vector 8-bit operations.
 109 //
 110 //
 111 //===----------------------------------------------------------------------===//
 112
 113 // Generate string block like
 114 // {
 115 //   .reg .s16 %temp1;
 116 //   .reg .s16 %temp2;
 117 //   cvt.s16.s8 %temp1, %a;
 118 //   cvt.s16.s8 %temp2, %b;
 119 //   opc.s16    %dst, %temp1, %temp2;
 120 // }
 121 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
 122 class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
 123   string s = !strconcat("{{\n\t",
 124              !strconcat(".reg .", !strconcat(TypeStr,
 125              !strconcat(" \t%temp1;\n\t",
 126              !strconcat(".reg .", !strconcat(TypeStr,
 127              !strconcat(" \t%temp2;\n\t",
 128              !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
 129              !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
 130              !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
 131 }
 132
 133 // Generate string block like
 134 // {
 135 //   .reg .s16 %temp1;
 136 //   .reg .s16 %temp2;
 137 //   cvt.s16.s8 %temp1, %a;
 138 //   mov.b16    %temp2, %b;
 139 //   cvt.s16.s8 %temp2, %temp2;
 140 //   opc.s16    %dst, %temp1, %temp2;
 141 // }
 142 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
 143 class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
 144   string s = !strconcat("{{\n\t",
 145              !strconcat(".reg .", !strconcat(TypeStr,
 146              !strconcat(" \t%temp1;\n\t",
 147              !strconcat(".reg .",
 148              !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
 149              !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
 150              !strconcat("mov.b16 \t%temp2, $b;\n\t",
 151              !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
 152              !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
 153 }
 154
 155 // Generate string block like
 156 // {
 157 //   .reg .s16 %temp1;
 158 //   .reg .s16 %temp2;
 159 //   mov.b16    %temp1, %b;
 160 //   cvt.s16.s8 %temp1, %temp1;
 161 //   cvt.s16.s8 %temp2, %a;
 162 //   opc.s16    %dst, %temp1, %temp2;
 163 // }
 164 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
 165 class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
 166   string s = !strconcat("{{\n\t",
 167              !strconcat(".reg .", !strconcat(TypeStr,
 168              !strconcat(" \t%temp1;\n\t",
 169              !strconcat(".reg .", !strconcat(TypeStr,
 170              !strconcat(" \t%temp2;\n\t",
 171              !strconcat("mov.b16 \t%temp1, $a;\n\t",
 172              !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
 173              !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
 174              !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
 175 }
 176
 177
 178 //===----------------------------------------------------------------------===//
 179 // Some Common Instruction Class Templates
 180 //===----------------------------------------------------------------------===//
 181
 182 multiclass I3<string OpcStr, SDNode OpNode> {
 183   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
 184                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 185                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
 186                        Int64Regs:$b))]>;
 187   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
 188                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 189                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
 190   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
 191                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 192                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
 193                        Int32Regs:$b))]>;
 194   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
 195                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 196                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
 197   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
 198                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 199                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
 200                        Int16Regs:$b))]>;
 201   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
 202                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 203                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
 204   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
 205                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 206                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
 207   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
 208                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 209                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
 210 }
 211
 212 multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
 213   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
 214                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 215                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
 216                        Int64Regs:$b))]>;
 217   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
 218                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 219                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
 220   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
 221                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 222                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
 223                        Int32Regs:$b))]>;
 224   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
 225                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 226                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
 227   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
 228                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 229                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
 230                        Int16Regs:$b))]>;
 231   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
 232                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 233                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
 234   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
 235                      Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
 236                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
 237   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
 238                      Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
 239                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
 240 }
 241
 242 multiclass I3_noi8<string OpcStr, SDNode OpNode> {
 243   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
 244                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 245                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
 246                        Int64Regs:$b))]>;
 247   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
 248                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 249                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
 250   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
 251                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 252                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
 253                        Int32Regs:$b))]>;
 254   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
 255                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 256                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
 257   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
 258                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 259                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
 260                        Int16Regs:$b))]>;
 261   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
 262                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 263                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
 264 }
 265
 266 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
 267    def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
 268        Int32Regs:$b),
 269                       !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
 270                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
 271                         Int32Regs:$b))]>;
 272    def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
 273                       !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
 274                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
 275 }
 276
 277 multiclass F3<string OpcStr, SDNode OpNode> {
 278    def f64rr : NVPTXInst<(outs Float64Regs:$dst),
 279                       (ins Float64Regs:$a, Float64Regs:$b),
 280                       !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
 281                       [(set Float64Regs:$dst,
 282                         (OpNode Float64Regs:$a, Float64Regs:$b))]>,
 283                       Requires<[allowFMA]>;
 284    def f64ri : NVPTXInst<(outs Float64Regs:$dst),
 285                       (ins Float64Regs:$a, f64imm:$b),
 286                       !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
 287                       [(set Float64Regs:$dst,
 288                         (OpNode Float64Regs:$a, fpimm:$b))]>,
 289                       Requires<[allowFMA]>;
 290    def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
 291                       (ins Float32Regs:$a, Float32Regs:$b),
 292                       !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
 293                       [(set Float32Regs:$dst,
 294                         (OpNode Float32Regs:$a, Float32Regs:$b))]>,
 295                       Requires<[allowFMA_ftz]>;
 296    def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
 297                       (ins Float32Regs:$a, f32imm:$b),
 298                       !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
 299                       [(set Float32Regs:$dst,
 300                         (OpNode Float32Regs:$a, fpimm:$b))]>,
 301                       Requires<[allowFMA_ftz]>;
 302    def f32rr : NVPTXInst<(outs Float32Regs:$dst),
 303                       (ins Float32Regs:$a, Float32Regs:$b),
 304                       !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
 305                       [(set Float32Regs:$dst,
 306                         (OpNode Float32Regs:$a, Float32Regs:$b))]>,
 307                       Requires<[allowFMA]>;
 308    def f32ri : NVPTXInst<(outs Float32Regs:$dst),
 309                       (ins Float32Regs:$a, f32imm:$b),
 310                       !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
 311                       [(set Float32Regs:$dst,
 312                         (OpNode Float32Regs:$a, fpimm:$b))]>,
 313                       Requires<[allowFMA]>;
 314 }
 315
 316 multiclass F3_rn<string OpcStr, SDNode OpNode> {
 317    def f64rr : NVPTXInst<(outs Float64Regs:$dst),
 318                       (ins Float64Regs:$a, Float64Regs:$b),
 319                       !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
 320                       [(set Float64Regs:$dst,
 321                         (OpNode Float64Regs:$a, Float64Regs:$b))]>;
 322    def f64ri : NVPTXInst<(outs Float64Regs:$dst),
 323                       (ins Float64Regs:$a, f64imm:$b),
 324                       !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
 325                       [(set Float64Regs:$dst,
 326                         (OpNode Float64Regs:$a, fpimm:$b))]>;
 327    def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
 328                       (ins Float32Regs:$a, Float32Regs:$b),
 329                       !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
 330                       [(set Float32Regs:$dst,
 331                         (OpNode Float32Regs:$a, Float32Regs:$b))]>,
 332                       Requires<[doF32FTZ]>;
 333    def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
 334                       (ins Float32Regs:$a, f32imm:$b),
 335                       !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
 336                       [(set Float32Regs:$dst,
 337                         (OpNode Float32Regs:$a, fpimm:$b))]>,
 338                       Requires<[doF32FTZ]>;
 339    def f32rr : NVPTXInst<(outs Float32Regs:$dst),
 340                       (ins Float32Regs:$a, Float32Regs:$b),
 341                       !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
 342                       [(set Float32Regs:$dst,
 343                         (OpNode Float32Regs:$a, Float32Regs:$b))]>;
 344    def f32ri : NVPTXInst<(outs Float32Regs:$dst),
 345                       (ins Float32Regs:$a, f32imm:$b),
 346                       !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
 347                       [(set Float32Regs:$dst,
 348                         (OpNode Float32Regs:$a, fpimm:$b))]>;
 349 }
 350
 351 multiclass F2<string OpcStr, SDNode OpNode> {
 352    def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
 353                       !strconcat(OpcStr, ".f64 \t$dst, $a;"),
 354                       [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
 355    def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
 356                       !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
 357                       [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
 358                       Requires<[doF32FTZ]>;
 359    def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
 360                       !strconcat(OpcStr, ".f32 \t$dst, $a;"),
 361                       [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
 362 }
 363
 364 //===----------------------------------------------------------------------===//
 365 // NVPTX Instructions.
 366 //===----------------------------------------------------------------------===//
 367
 368 //-----------------------------------
 369 // Integer Arithmetic
 370 //-----------------------------------
 371
 372 multiclass ADD_SUB_i1<SDNode OpNode> {
 373    def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
 374           "xor.pred \t$dst, $a, $b;",
 375       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
 376    def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
 377           "xor.pred \t$dst, $a, $b;",
 378       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
 379 }
 380
 381 defm ADD_i1 : ADD_SUB_i1<add>;
 382 defm SUB_i1 : ADD_SUB_i1<sub>;
 383
 384
 385 defm ADD : I3<"add.s", add>;
 386 defm SUB : I3<"sub.s", sub>;
 387
 388 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
 389 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
 390
 391 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
 392 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
 393
 394 //mul.wide PTX instruction
 395 def SInt32Const : PatLeaf<(imm), [{
 396   const APInt &v = N->getAPIntValue();
 397   if (v.isSignedIntN(32))
 398     return true;
 399   return false;
 400 }]>;
 401
 402 def UInt32Const : PatLeaf<(imm), [{
 403   const APInt &v = N->getAPIntValue();
 404   if (v.isIntN(32))
 405     return true;
 406   return false;
 407 }]>;
 408
 409 def SInt16Const : PatLeaf<(imm), [{
 410   const APInt &v = N->getAPIntValue();
 411   if (v.isSignedIntN(16))
 412     return true;
 413   return false;
 414 }]>;
 415
 416 def UInt16Const : PatLeaf<(imm), [{
 417   const APInt &v = N->getAPIntValue();
 418   if (v.isIntN(16))
 419     return true;
 420   return false;
 421 }]>;
 422
 423 def Int5Const : PatLeaf<(imm), [{
 424   const APInt &v = N->getAPIntValue();
 425   // Check if 0 <= v < 32
 426   // Only then the result from (x << v) will be i32
 427   if (v.sge(0) && v.slt(32))
 428     return true;
 429   return false;
 430 }]>;
 431
 432 def Int4Const : PatLeaf<(imm), [{
 433   const APInt &v = N->getAPIntValue();
 434   // Check if 0 <= v < 16
 435   // Only then the result from (x << v) will be i16
 436   if (v.sge(0) && v.slt(16))
 437     return true;
 438   return false;
 439 }]>;
 440
 441 def SHL2MUL32 : SDNodeXForm<imm, [{
 442   const APInt &v = N->getAPIntValue();
 443   APInt temp(32, 1);
 444   return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
 445 }]>;
 446
 447 def SHL2MUL16 : SDNodeXForm<imm, [{
 448   const APInt &v = N->getAPIntValue();
 449   APInt temp(16, 1);
 450   return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
 451 }]>;
 452
 453 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
 454                            (ins Int32Regs:$a, Int32Regs:$b),
 455                            "mul.wide.s32 \t$dst, $a, $b;", []>;
 456 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
 457                             (ins Int32Regs:$a, i64imm:$b),
 458                            "mul.wide.s32 \t$dst, $a, $b;", []>;
 459
 460 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
 461                            (ins Int32Regs:$a, Int32Regs:$b),
 462                            "mul.wide.u32 \t$dst, $a, $b;", []>;
 463 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
 464                             (ins Int32Regs:$a, i64imm:$b),
 465                            "mul.wide.u32 \t$dst, $a, $b;", []>;
 466
 467 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
 468                             (ins Int16Regs:$a, Int16Regs:$b),
 469                            "mul.wide.s16 \t$dst, $a, $b;", []>;
 470 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
 471                             (ins Int16Regs:$a, i32imm:$b),
 472                            "mul.wide.s16 \t$dst, $a, $b;", []>;
 473
 474 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
 475                             (ins Int16Regs:$a, Int16Regs:$b),
 476                            "mul.wide.u16 \t$dst, $a, $b;", []>;
 477 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
 478                             (ins Int16Regs:$a, i32imm:$b),
 479                            "mul.wide.u16 \t$dst, $a, $b;", []>;
 480
 481 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
 482           (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
 483           Requires<[doMulWide]>;
 484 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
 485           (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
 486           Requires<[doMulWide]>;
 487
 488 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
 489           (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
 490           Requires<[doMulWide]>;
 491 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
 492           (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
 493           Requires<[doMulWide]>;
 494
 495 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
 496           (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
 497           Requires<[doMulWide]>;
 498 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
 499           (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
 500           Requires<[doMulWide]>;
 501
 502 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
 503           (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
 504 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
 505           (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
 506           Requires<[doMulWide]>;
 507
 508 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
 509           (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
 510 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
 511           (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
 512           Requires<[doMulWide]>;
 513
 514 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
 515           (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
 516 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
 517           (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
 518           Requires<[doMulWide]>;
 519
 520 defm MULT : I3<"mul.lo.s", mul>;
 521
 522 defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
 523 defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
 524 def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
 525             !strconcat("{{ \n\t",
 526             !strconcat(".reg \t.s16 temp1; \n\t",
 527             !strconcat(".reg \t.s16 temp2; \n\t",
 528             !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
 529             !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
 530             !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
 531             !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
 532             !strconcat("}}", "")))))))),
 533       [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
 534 def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
 535             !strconcat("{{ \n\t",
 536             !strconcat(".reg \t.s16 temp1; \n\t",
 537             !strconcat(".reg \t.s16 temp2; \n\t",
 538             !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
 539             !strconcat("mov.b16 \ttemp2, $b; \n\t",
 540             !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
 541             !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
 542             !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
 543             !strconcat("}}", ""))))))))),
 544       [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
 545 def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
 546             !strconcat("{{ \n\t",
 547             !strconcat(".reg \t.u16 temp1; \n\t",
 548             !strconcat(".reg \t.u16 temp2; \n\t",
 549             !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
 550             !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
 551             !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
 552             !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
 553             !strconcat("}}", "")))))))),
 554       [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
 555 def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
 556             !strconcat("{{ \n\t",
 557             !strconcat(".reg \t.u16 temp1; \n\t",
 558             !strconcat(".reg \t.u16 temp2; \n\t",
 559             !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
 560             !strconcat("mov.b16 \ttemp2, $b; \n\t",
 561             !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
 562             !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
 563             !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
 564             !strconcat("}}", ""))))))))),
 565       [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
 566
 567
 568 defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
 569 defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
 570
 571 defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
 572 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
 573 defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
 574 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
 575
 576 def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
 577                       (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
 578                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 579                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
 580                         Int8Regs:$c))]>;
 581 def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
 582                       (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
 583                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 584                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
 585                         imm:$c))]>;
 586 def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
 587                       (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
 588                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 589                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
 590                         Int8Regs:$c))]>;
 591 def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
 592                       (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
 593                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 594                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
 595                         imm:$c))]>;
 596
 597 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
 598                       (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
 599                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 600                       [(set Int16Regs:$dst, (add
 601                         (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
 602 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
 603                       (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
 604                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 605                       [(set Int16Regs:$dst, (add
 606                         (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
 607 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
 608                       (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
 609                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 610                       [(set Int16Regs:$dst, (add
 611                         (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
 612 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
 613     (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
 614                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 615                       [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
 616                         imm:$c))]>;
 617
 618 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
 619                       (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
 620                       "mad.lo.s32 \t$dst, $a, $b, $c;",
 621                       [(set Int32Regs:$dst, (add
 622                         (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
 623 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
 624                       (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
 625                       "mad.lo.s32 \t$dst, $a, $b, $c;",
 626                       [(set Int32Regs:$dst, (add
 627                         (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
 628 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
 629                       (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
 630                       "mad.lo.s32 \t$dst, $a, $b, $c;",
 631                       [(set Int32Regs:$dst, (add
 632                         (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
 633 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
 634                       (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
 635                       "mad.lo.s32 \t$dst, $a, $b, $c;",
 636                       [(set Int32Regs:$dst, (add
 637                         (mul Int32Regs:$a, imm:$b), imm:$c))]>;
 638
 639 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
 640                       (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
 641                       "mad.lo.s64 \t$dst, $a, $b, $c;",
 642                       [(set Int64Regs:$dst, (add
 643                         (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
 644 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
 645                       (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
 646                       "mad.lo.s64 \t$dst, $a, $b, $c;",
 647                       [(set Int64Regs:$dst, (add
 648                         (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
 649 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
 650                       (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
 651                       "mad.lo.s64 \t$dst, $a, $b, $c;",
 652                       [(set Int64Regs:$dst, (add
 653                         (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
 654 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
 655                       (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
 656                       "mad.lo.s64 \t$dst, $a, $b, $c;",
 657                       [(set Int64Regs:$dst, (add
 658                         (mul Int64Regs:$a, imm:$b), imm:$c))]>;
 659
 660
 661 def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
 662                      !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
 663                                  "neg.s16 \t$dst, $dst;"),
 664          [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
 665 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
 666                      "neg.s16 \t$dst, $src;",
 667          [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
 668 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
 669                      "neg.s32 \t$dst, $src;",
 670          [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
 671 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
 672                      "neg.s64 \t$dst, $src;",
 673          [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
 674
 675 //-----------------------------------
 676 // Floating Point Arithmetic
 677 //-----------------------------------
 678
 679 // Constant 1.0f
 680 def FloatConst1 : PatLeaf<(fpimm), [{
 681     if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
 682       return false;
 683     float f = (float)N->getValueAPF().convertToFloat();
 684     return (f==1.0f);
 685 }]>;
 686 // Constand (double)1.0
 687 def DoubleConst1 : PatLeaf<(fpimm), [{
 688     if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
 689       return false;
 690     double d = (double)N->getValueAPF().convertToDouble();
 691     return (d==1.0);
 692 }]>;
 693
 694 defm FADD : F3<"add", fadd>;
 695 defm FSUB : F3<"sub", fsub>;
 696 defm FMUL : F3<"mul", fmul>;
 697
 698 defm FADD_rn : F3_rn<"add", fadd>;
 699 defm FSUB_rn : F3_rn<"sub", fsub>;
 700 defm FMUL_rn : F3_rn<"mul", fmul>;
 701
 702 defm FABS : F2<"abs", fabs>;
 703 defm FNEG : F2<"neg", fneg>;
 704 defm FSQRT : F2<"sqrt.rn", fsqrt>;
 705
 706 //
 707 // F64 division
 708 //
 709 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
 710                       (ins f64imm:$a, Float64Regs:$b),
 711                       "rcp.rn.f64 \t$dst, $b;",
 712                       [(set Float64Regs:$dst,
 713                         (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
 714 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
 715                       (ins Float64Regs:$a, Float64Regs:$b),
 716                       "div.rn.f64 \t$dst, $a, $b;",
 717                       [(set Float64Regs:$dst,
 718                         (fdiv Float64Regs:$a, Float64Regs:$b))]>;
 719 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
 720                       (ins Float64Regs:$a, f64imm:$b),
 721                       "div.rn.f64 \t$dst, $a, $b;",
 722                       [(set Float64Regs:$dst,
 723                         (fdiv Float64Regs:$a, fpimm:$b))]>;
 724
 725 //
 726 // F32 Approximate reciprocal
 727 //
 728 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
 729                       (ins f32imm:$a, Float32Regs:$b),
 730                       "rcp.approx.ftz.f32 \t$dst, $b;",
 731                       [(set Float32Regs:$dst,
 732                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 733                       Requires<[do_DIVF32_APPROX, doF32FTZ]>;
 734 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
 735                         (ins f32imm:$a, Float32Regs:$b),
 736                        "rcp.approx.f32 \t$dst, $b;",
 737                       [(set Float32Regs:$dst,
 738                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 739                       Requires<[do_DIVF32_APPROX]>;
 740 //
 741 // F32 Approximate division
 742 //
 743 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
 744                       (ins Float32Regs:$a, Float32Regs:$b),
 745                       "div.approx.ftz.f32 \t$dst, $a, $b;",
 746                       [(set Float32Regs:$dst,
 747                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 748                       Requires<[do_DIVF32_APPROX, doF32FTZ]>;
 749 def FDIV32approxrr     : NVPTXInst<(outs Float32Regs:$dst),
 750                       (ins Float32Regs:$a, Float32Regs:$b),
 751                       "div.approx.f32 \t$dst, $a, $b;",
 752                       [(set Float32Regs:$dst,
 753                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 754                       Requires<[do_DIVF32_APPROX]>;
 755 //
 756 // F32 Semi-accurate reciprocal
 757 //
 758 // rcp.approx gives the same result as div.full(1.0f, a) and is faster.
 759 //
 760 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
 761                       (ins f32imm:$a, Float32Regs:$b),
 762                       "rcp.approx.ftz.f32 \t$dst, $b;",
 763                       [(set Float32Regs:$dst,
 764                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 765                       Requires<[do_DIVF32_FULL, doF32FTZ]>;
 766 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
 767                       (ins f32imm:$a, Float32Regs:$b),
 768                       "rcp.approx.f32 \t$dst, $b;",
 769                       [(set Float32Regs:$dst,
 770                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 771                       Requires<[do_DIVF32_FULL]>;
 772 //
 773 // F32 Semi-accurate division
 774 //
 775 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
 776                       (ins Float32Regs:$a, Float32Regs:$b),
 777                       "div.full.ftz.f32 \t$dst, $a, $b;",
 778                       [(set Float32Regs:$dst,
 779                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 780                       Requires<[do_DIVF32_FULL, doF32FTZ]>;
 781 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
 782                       (ins Float32Regs:$a, f32imm:$b),
 783                       "div.full.ftz.f32 \t$dst, $a, $b;",
 784                       [(set Float32Regs:$dst,
 785                         (fdiv Float32Regs:$a, fpimm:$b))]>,
 786                       Requires<[do_DIVF32_FULL, doF32FTZ]>;
 787 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
 788                       (ins Float32Regs:$a, Float32Regs:$b),
 789                       "div.full.f32 \t$dst, $a, $b;",
 790                       [(set Float32Regs:$dst,
 791                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 792                       Requires<[do_DIVF32_FULL]>;
 793 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
 794                       (ins Float32Regs:$a, f32imm:$b),
 795                       "div.full.f32 \t$dst, $a, $b;",
 796                       [(set Float32Regs:$dst,
 797                         (fdiv Float32Regs:$a, fpimm:$b))]>,
 798                       Requires<[do_DIVF32_FULL]>;
 799 //
 800 // F32 Accurate reciprocal
 801 //
 802 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
 803                         (ins f32imm:$a, Float32Regs:$b),
 804                        "rcp.rn.ftz.f32 \t$dst, $b;",
 805                       [(set Float32Regs:$dst,
 806                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 807                       Requires<[reqPTX20, doF32FTZ]>;
 808 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
 809                       (ins f32imm:$a, Float32Regs:$b),
 810                        "rcp.rn.f32 \t$dst, $b;",
 811                       [(set Float32Regs:$dst,
 812                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 813                       Requires<[reqPTX20]>;
 814 //
 815 // F32 Accurate division
 816 //
 817 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
 818                       (ins Float32Regs:$a, Float32Regs:$b),
 819                       "div.rn.ftz.f32 \t$dst, $a, $b;",
 820                       [(set Float32Regs:$dst,
 821                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 822                       Requires<[doF32FTZ, reqPTX20]>;
 823 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
 824                       (ins Float32Regs:$a, f32imm:$b),
 825                       "div.rn.ftz.f32 \t$dst, $a, $b;",
 826                       [(set Float32Regs:$dst,
 827                         (fdiv Float32Regs:$a, fpimm:$b))]>,
 828                       Requires<[doF32FTZ, reqPTX20]>;
 829 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
 830                       (ins Float32Regs:$a, Float32Regs:$b),
 831                       "div.rn.f32 \t$dst, $a, $b;",
 832                       [(set Float32Regs:$dst,
 833                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 834                       Requires<[reqPTX20]>;
 835 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
 836                       (ins Float32Regs:$a, f32imm:$b),
 837                       "div.rn.f32 \t$dst, $a, $b;",
 838                       [(set Float32Regs:$dst,
 839                         (fdiv Float32Regs:$a, fpimm:$b))]>,
 840                       Requires<[reqPTX20]>;
 841
 842
 843 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
 844    def rrr : NVPTXInst<(outs Float32Regs:$dst),
 845                       (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
 846                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 847                       [(set Float32Regs:$dst, (fadd
 848                         (fmul Float32Regs:$a, Float32Regs:$b),
 849                         Float32Regs:$c))]>, Requires<[Pred]>;
 850    // This is to WAR a weird bug in Tablegen that does not automatically
 851    // generate the following permutated rule rrr2 from the above rrr.
 852    // So we explicitly add it here. This happens to FMA32 only.
 853    // See the comments at FMAD32 and FMA32 for more information.
 854    def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
 855                         (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
 856                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 857                       [(set Float32Regs:$dst, (fadd Float32Regs:$c,
 858                         (fmul Float32Regs:$a, Float32Regs:$b)))]>,
 859                       Requires<[Pred]>;
 860    def rri : NVPTXInst<(outs Float32Regs:$dst),
 861                       (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
 862                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 863                       [(set Float32Regs:$dst, (fadd
 864                         (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
 865                       Requires<[Pred]>;
 866    def rir : NVPTXInst<(outs Float32Regs:$dst),
 867                       (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
 868                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 869                       [(set Float32Regs:$dst, (fadd
 870                         (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
 871                       Requires<[Pred]>;
 872    def rii : NVPTXInst<(outs Float32Regs:$dst),
 873                       (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
 874                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 875                       [(set Float32Regs:$dst, (fadd
 876                         (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
 877                       Requires<[Pred]>;
 878 }
 879
 880 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
 881    def rrr : NVPTXInst<(outs Float64Regs:$dst),
 882                       (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
 883                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 884                       [(set Float64Regs:$dst, (fadd
 885                         (fmul Float64Regs:$a, Float64Regs:$b),
 886                         Float64Regs:$c))]>, Requires<[Pred]>;
 887    def rri : NVPTXInst<(outs Float64Regs:$dst),
 888                       (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
 889                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 890                       [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
 891                         Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
 892    def rir : NVPTXInst<(outs Float64Regs:$dst),
 893                       (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
 894                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 895                       [(set Float64Regs:$dst, (fadd
 896                         (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
 897                       Requires<[Pred]>;
 898    def rii : NVPTXInst<(outs Float64Regs:$dst),
 899                       (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
 900                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 901                       [(set Float64Regs:$dst, (fadd
 902                         (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
 903                       Requires<[Pred]>;
 904 }
 905
 906 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not
 907 // automatically generate the rrr2 rule from
 908 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
 909 // If we reverse the order of the following two lines, then rrr2 rule will be
 910 // generated for FMA32, but not for rrr.
 911 // Therefore, we manually write the rrr2 rule in FPCONTRACT32.
 912 defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
 913 defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
 914 defm FMA32_ftz  : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
 915 defm FMA32  : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
 916 defm FMA64  : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
 917
 918 // b*c-a => fmad(b, c, -a)
 919 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
 920   def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
 921           (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
 922           Requires<[Pred]>;
 923 }
 924
 925 // a-b*c => fmad(-b,c, a)
 926 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
 927 // b*c-a => fmad(b, c, -a)
 928 // - legal because b*c-a <=> b*c+(-a)
 929 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
 930   def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
 931           (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
 932           Requires<[Pred]>;
 933   def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
 934           (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
 935           Requires<[Pred]>;
 936 }
 937
 938 // a-b*c => fmad(-b,c, a)
 939 // b*c-a => fmad(b, c, -a)
 940 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
 941   def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
 942           (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
 943           Requires<[Pred]>;
 944
 945   def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
 946           (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
 947           Requires<[Pred]>;
 948 }
 949
 950 defm FMAF32ext_ftz  : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
 951 defm FMAF32ext  : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
 952 defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
 953 defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
 954 defm FMAF64ext  : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
 955
 956 def SINF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
 957                       "sin.approx.f32 \t$dst, $src;",
 958                       [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
 959 def COSF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
 960                       "cos.approx.f32 \t$dst, $src;",
 961                       [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
 962
 963 //-----------------------------------
 964 // Logical Arithmetic
 965 //-----------------------------------
 966
 967 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
 968   def b1rr:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
 969                       !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
 970                       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
 971   def b1ri:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
 972                       !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
 973                       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
 974   def b8rr:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
 975                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
 976                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
 977   def b8ri:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
 978                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
 979                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
 980   def b16rr:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
 981                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
 982                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
 983                         Int16Regs:$b))]>;
 984   def b16ri:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
 985                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
 986                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
 987   def b32rr:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
 988                       !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
 989                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
 990                         Int32Regs:$b))]>;
 991   def b32ri:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
 992                       !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
 993                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
 994   def b64rr:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
 995                       !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
 996                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
 997                         Int64Regs:$b))]>;
 998   def b64ri:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
 999                       !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
1000                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1001 }
1002
1003 defm OR  : LOG_FORMAT<"or", or>;
1004 defm AND : LOG_FORMAT<"and", and>;
1005 defm XOR : LOG_FORMAT<"xor", xor>;
1006
1007 def NOT1:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
1008                       "not.pred \t$dst, $src;",
1009                       [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1010 def NOT8:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
1011                       "not.b16 \t$dst, $src;",
1012                       [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
1013 def NOT16:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1014                       "not.b16 \t$dst, $src;",
1015                       [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1016 def NOT32:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1017                       "not.b32 \t$dst, $src;",
1018                       [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1019 def NOT64:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1020                       "not.b64 \t$dst, $src;",
1021                       [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1022
1023 // For shifts, the second src operand must be 32-bit value
1024 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1025    def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1026                       Int32Regs:$b),
1027                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1028                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1029                         Int32Regs:$b))]>;
1030    def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1031                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1032                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1033                         (i32 imm:$b)))]>;
1034    def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1035                       Int32Regs:$b),
1036                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1037                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1038                         Int32Regs:$b))]>;
1039    def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1040                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1041                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1042                         (i32 imm:$b)))]>;
1043    def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1044                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1045                       [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1046                         (i32 imm:$b)))]>;
1047    def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1048                       Int32Regs:$b),
1049                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1050                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1051                         Int32Regs:$b))]>;
1052    def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1053                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1054                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1055                         (i32 imm:$b)))]>;
1056    def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1057                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1058                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1059                         Int32Regs:$b))]>;
1060    def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1061                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1062                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1063                         (i32 imm:$b)))]>;
1064 }
1065
1066 defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1067
1068 // For shifts, the second src operand must be 32-bit value
1069 // Need to add cvt for the 8-bits.
1070 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
1071    def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1072                       Int32Regs:$b),
1073                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1074                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1075                         Int32Regs:$b))]>;
1076    def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1077                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1078                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1079                         (i32 imm:$b)))]>;
1080    def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1081                       Int32Regs:$b),
1082                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1083                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1084                         Int32Regs:$b))]>;
1085    def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1086                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1087                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1088                         (i32 imm:$b)))]>;
1089    def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1090                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1091                       [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1092                         (i32 imm:$b)))]>;
1093    def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1094                       Int32Regs:$b),
1095                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1096                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1097                         Int32Regs:$b))]>;
1098    def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1099                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1100                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1101                         (i32 imm:$b)))]>;
1102    def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1103                       !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1104                       !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1105                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1106                         Int32Regs:$b))]>;
1107    def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1108                       !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1109                       !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1110                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1111                         (i32 imm:$b)))]>;
1112 }
1113
1114 defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
1115 defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
1116
1117 // 32bit
1118 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1119   (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1120     !strconcat("{{\n\t",
1121     !strconcat(".reg .b32 %lhs;\n\t",
1122     !strconcat(".reg .b32 %rhs;\n\t",
1123     !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1124     !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1125     !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1126     !strconcat("}}", ""))))))),
1127     []>;
1128
1129 def SUB_FRM_32 : SDNodeXForm<imm, [{
1130     return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1131 }]>;
1132
1133 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1134           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
1135 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1136           (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
1137
1138 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1139     Int32Regs:$amt),
1140     !strconcat("{{\n\t",
1141     !strconcat(".reg .b32 %lhs;\n\t",
1142     !strconcat(".reg .b32 %rhs;\n\t",
1143     !strconcat(".reg .b32 %amt2;\n\t",
1144     !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1145     !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1146     !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1147     !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1148     !strconcat("}}", ""))))))))),
1149     [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
1150
1151 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1152     Int32Regs:$amt),
1153     !strconcat("{{\n\t",
1154     !strconcat(".reg .b32 %lhs;\n\t",
1155     !strconcat(".reg .b32 %rhs;\n\t",
1156     !strconcat(".reg .b32 %amt2;\n\t",
1157     !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1158     !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1159     !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1160     !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1161     !strconcat("}}", ""))))))))),
1162     [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
1163
1164 // 64bit
1165 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1166     i32imm:$amt1, i32imm:$amt2),
1167     !strconcat("{{\n\t",
1168     !strconcat(".reg .b64 %lhs;\n\t",
1169     !strconcat(".reg .b64 %rhs;\n\t",
1170     !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1171     !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1172     !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1173     !strconcat("}}", ""))))))),
1174     []>;
1175
1176 def SUB_FRM_64 : SDNodeXForm<imm, [{
1177     return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
1178 }]>;
1179
1180 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1181           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1182 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1183           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1184
1185 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1186     Int32Regs:$amt),
1187     !strconcat("{{\n\t",
1188     !strconcat(".reg .b64 %lhs;\n\t",
1189     !strconcat(".reg .b64 %rhs;\n\t",
1190     !strconcat(".reg .u32 %amt2;\n\t",
1191     !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1192     !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1193     !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1194     !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1195     !strconcat("}}", ""))))))))),
1196     [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1197
1198 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1199     Int32Regs:$amt),
1200     !strconcat("{{\n\t",
1201     !strconcat(".reg .b64 %lhs;\n\t",
1202     !strconcat(".reg .b64 %rhs;\n\t",
1203     !strconcat(".reg .u32 %amt2;\n\t",
1204     !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1205     !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1206     !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1207     !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1208     !strconcat("}}", ""))))))))),
1209     [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1210
1211
1212 //-----------------------------------
1213 // Data Movement (Load / Store, Move)
1214 //-----------------------------------
1215
1216 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1217   [SDNPWantRoot]>;
1218 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1219   [SDNPWantRoot]>;
1220
1221 def MEMri : Operand<i32> {
1222   let PrintMethod = "printMemOperand";
1223   let MIOperandInfo = (ops Int32Regs, i32imm);
1224 }
1225 def MEMri64 : Operand<i64> {
1226   let PrintMethod = "printMemOperand";
1227   let MIOperandInfo = (ops Int64Regs, i64imm);
1228 }
1229
1230 def imem : Operand<iPTR> {
1231     let PrintMethod = "printOperand";
1232 }
1233
1234 def imemAny : Operand<iPTRAny> {
1235     let PrintMethod = "printOperand";
1236 }
1237
1238 def LdStCode : Operand<i32> {
1239     let PrintMethod = "printLdStCode";
1240 }
1241
1242 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1243 def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1244
1245 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1246                      "mov.u32 \t$dst, $a;",
1247                      [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1248
1249 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1250                      "mov.u64 \t$dst, $a;",
1251                      [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1252
1253 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1254 let IsSimpleMove=1 in {
1255 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1256                    "mov.pred \t$dst, $sss;", []>;
1257 def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
1258                     "mov.u16 \t$dst, $sss;", []>;
1259 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1260                     "mov.u16 \t$dst, $sss;", []>;
1261 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1262                     "mov.u32 \t$dst, $sss;", []>;
1263 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1264                     "mov.u64 \t$dst, $sss;", []>;
1265
1266 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1267                     "mov.f32 \t$dst, $src;", []>;
1268 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1269                     "mov.f64 \t$dst, $src;", []>;
1270 }
1271 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1272                     "mov.pred \t$dst, $src;",
1273           [(set Int1Regs:$dst, imm:$src)]>;
1274 def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
1275                     "mov.u16 \t$dst, $src;",
1276           [(set Int8Regs:$dst, imm:$src)]>;
1277 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1278                     "mov.u16 \t$dst, $src;",
1279           [(set Int16Regs:$dst, imm:$src)]>;
1280 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1281                     "mov.u32 \t$dst, $src;",
1282           [(set Int32Regs:$dst, imm:$src)]>;
1283 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1284                     "mov.u64 \t$dst, $src;",
1285           [(set Int64Regs:$dst, imm:$src)]>;
1286
1287 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1288                     "mov.f32 \t$dst, $src;",
1289           [(set Float32Regs:$dst, fpimm:$src)]>;
1290 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1291                     "mov.f64 \t$dst, $src;",
1292           [(set Float64Regs:$dst, fpimm:$src)]>;
1293
1294 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1295
1296 //---- Copy Frame Index ----
1297 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1298                         "add.u32 \t$dst, ${addr:add};",
1299                         [(set Int32Regs:$dst, ADDRri:$addr)]>;
1300 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1301                         "add.u64 \t$dst, ${addr:add};",
1302                         [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1303
1304 //-----------------------------------
1305 // Comparison and Selection
1306 //-----------------------------------
1307
1308 // Generate string block like
1309 // {
1310 //   .reg .pred p;
1311 //   setp.gt.s16 p, %a, %b;
1312 //   selp.s16 %dst, -1, 0, p;
1313 // }
1314 // when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
1315 class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
1316   string b> {
1317   string t1  = "{{\n\t.reg .pred p;\n\t";
1318   string t2  = !strconcat(t1 , OpcStr);
1319   string t3  = !strconcat(t2 , sz1);
1320   string t4  = !strconcat(t3 , " \tp, ");
1321   string t5  = !strconcat(t4 , a);
1322   string t6  = !strconcat(t5 , ", ");
1323   string t7  = !strconcat(t6 , b);
1324   string t8  = !strconcat(t7 , ";\n\tselp.s");
1325   string t9  = !strconcat(t8 , sz2);
1326   string t10 = !strconcat(t9, " \t");
1327   string t11 = !strconcat(t10, d);
1328   string s   = !strconcat(t11, ", -1, 0, p;\n\t}}");
1329 }
1330
1331 // Generate string block like
1332 // {
1333 //   .reg .pred p;
1334 //   .reg .s16 %temp1;
1335 //   .reg .s16 %temp2;
1336 //   cvt.s16.s8 %temp1, %a;
1337 //   cvt s16.s8 %temp1, %b;
1338 //   setp.gt.s16 p, %temp1, %temp2;
1339 //   selp.s16 %dst, -1, 0, p;
1340 // }
1341 // when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
1342 class Set_Stri8<string OpcStr, string d, string a, string b, string type,
1343   string cvt> {
1344   string t1  = "{{\n\t.reg .pred p;\n\t";
1345   string t2  = !strconcat(t1, ".reg .");
1346   string t3  = !strconcat(t2, type);
1347   string t4  = !strconcat(t3, " %temp1;\n\t");
1348   string t5  = !strconcat(t4, ".reg .");
1349   string t6  = !strconcat(t5, type);
1350   string t7  = !strconcat(t6, " %temp2;\n\t");
1351   string t8  = !strconcat(t7, cvt);
1352   string t9  = !strconcat(t8, " \t%temp1, ");
1353   string t10 = !strconcat(t9, a);
1354   string t11 = !strconcat(t10, ";\n\t");
1355   string t12 = !strconcat(t11, cvt);
1356   string t13 = !strconcat(t12, " \t%temp2, ");
1357   string t14 = !strconcat(t13, b);
1358   string t15 = !strconcat(t14, ";\n\t");
1359   string t16 = !strconcat(t15, OpcStr);
1360   string t17 = !strconcat(t16, "16");
1361   string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
1362   string t19 = !strconcat(t18, "selp.s16 \t");
1363   string t20 = !strconcat(t19, d);
1364   string s   = !strconcat(t20, ", -1, 0, p;\n\t}}");
1365 }
1366
1367 multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
1368   string TypeStr, string CVTStr> {
1369   def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1370                      Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
1371                []>;
1372   def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1373       Int16Regs:$b),
1374                      Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
1375                []>;
1376   def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1377       Int32Regs:$b),
1378                      Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
1379                []>;
1380   def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1381       Int64Regs:$b),
1382                      Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
1383                []>;
1384
1385   def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1386                      Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
1387                [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1388   def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1389                      Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
1390                [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1391   def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1392                      Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
1393                [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1394   def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
1395                  !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1396                [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1397   def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1398                  !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1399                [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1400   def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1401                  !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1402                [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1403   def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
1404                  !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1405                [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1406   def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1407                  !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1408                [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1409   def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1410                  !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1411                [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1412   def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
1413                  !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1414                [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1415   def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1416                  !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1417                [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1418   def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1419                  !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1420                [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1421
1422   def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1423                      Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
1424                [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1425   def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1426                      Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
1427                [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1428   def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1429                      Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
1430                [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1431   def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
1432       Int16Regs:$b),
1433                  !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1434                [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1435   def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1436                  !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1437                [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1438   def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1439                  !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1440                [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1441   def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1442       Int32Regs:$b),
1443                  !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1444                [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1445   def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1446                  !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1447                [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1448   def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1449                  !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1450                [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1451   def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
1452       Int64Regs:$b),
1453                  !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1454                [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1455   def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1456                  !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1457                [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1458   def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1459                  !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1460                [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1461 }
1462
1463 multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
1464   def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1465       Float32Regs:$b),
1466                      Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
1467                []>, Requires<[doF32FTZ]>;
1468   def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1469       Float32Regs:$b),
1470                      Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
1471                []>;
1472   def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
1473       Float64Regs:$b),
1474                      Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
1475                []>;
1476   def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
1477       Float64Regs:$b),
1478                      Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
1479                []>;
1480
1481   def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
1482       , Float32Regs:$b),
1483                  !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1484                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
1485   , Requires<[doF32FTZ]>;
1486   def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
1487     (ins Float32Regs:$a, Float32Regs:$b),
1488                  !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1489                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1490   def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1491     (ins Float32Regs:$a, f32imm:$b),
1492                  !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1493                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
1494   Requires<[doF32FTZ]>;
1495   def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
1496                  !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1497                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1498   def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1499     (ins f32imm:$a, Float32Regs:$b),
1500                  !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1501                [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
1502   Requires<[doF32FTZ]>;
1503   def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
1504                  !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1505                [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1506   def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
1507     (ins Float64Regs:$a, Float64Regs:$b),
1508                  !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1509                [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1510   def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
1511                  !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1512                [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1513   def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
1514                  !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1515                [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1516
1517   def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1518     (ins Float32Regs:$a, Float32Regs:$b),
1519                  !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1520                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1521   def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1522     (ins Float32Regs:$a, Float32Regs:$b),
1523                  !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1524                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1525   def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1526     (ins Float32Regs:$a, f32imm:$b),
1527                  !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1528                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1529   def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1530     (ins Float32Regs:$a, f32imm:$b),
1531                  !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1532                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1533   def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1534     (ins f32imm:$a, Float32Regs:$b),
1535                  !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1536                [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1537   def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1538     (ins f32imm:$a, Float32Regs:$b),
1539                  !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1540                [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1541   def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1542     (ins Float64Regs:$a, Float64Regs:$b),
1543                  !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1544                [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1545   def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1546     (ins Float64Regs:$a, f64imm:$b),
1547                  !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1548                [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1549   def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1550     (ins f64imm:$a, Float64Regs:$b),
1551                  !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1552                [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1553 }
1554
1555 defm ISetSGT
1556 : ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
1557 defm ISetUGT
1558 : ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
1559 defm ISetSLT
1560 : ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
1561 defm ISetULT
1562 : ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
1563 defm ISetSGE
1564 : ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
1565 defm ISetUGE
1566 : ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
1567 defm ISetSLE
1568 : ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
1569 defm ISetULE
1570 : ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
1571 defm ISetSEQ
1572 : ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
1573 defm ISetUEQ
1574 : ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
1575 defm ISetSNE
1576 : ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
1577 defm ISetUNE
1578 : ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
1579
1580 def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1581   (ins Int1Regs:$a, Int1Regs:$b),
1582                       "xor.pred \t$dst, $a, $b;",
1583             [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1584 def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1585   (ins Int1Regs:$a, Int1Regs:$b),
1586                       "xor.pred \t$dst, $a, $b;",
1587             [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
1588 def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1589   (ins Int1Regs:$a, Int1Regs:$b),
1590             !strconcat("{{\n\t",
1591             !strconcat(".reg .pred temp;\n\t",
1592             !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1593             !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1594             [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1595 def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1596   (ins Int1Regs:$a, Int1Regs:$b),
1597             !strconcat("{{\n\t",
1598             !strconcat(".reg .pred temp;\n\t",
1599             !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1600             !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1601             [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
1602
1603 // Compare 2 i1's and produce a u32
1604 def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1605   (ins Int1Regs:$a, Int1Regs:$b),
1606                   !strconcat("{{\n\t",
1607                   !strconcat(".reg .pred temp;\n\t",
1608                   !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1609                   !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
1610                   [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1611 def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1612   (ins Int1Regs:$a, Int1Regs:$b),
1613                   !strconcat("{{\n\t",
1614                   !strconcat(".reg .pred temp;\n\t",
1615                   !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1616                   !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
1617                   [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1618
1619 defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
1620 defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
1621 defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
1622 defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
1623 defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
1624 defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
1625
1626 defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
1627 defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
1628 defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
1629 defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
1630 defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
1631 defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
1632
1633 defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
1634 defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
1635
1636 def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1637                      (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1638                              (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1639 def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
1640   (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
1641                       "selp.b16 \t$dst, $a, $b, $p;",
1642       [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
1643 def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
1644   (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
1645                       "selp.b16 \t$dst, $a, $b, $p;",
1646       [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
1647 def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
1648   (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
1649                       "selp.b16 \t$dst, $a, $b, $p;",
1650       [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
1651 def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
1652   (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
1653                       "selp.b16 \t$dst, $a, $b, $p;",
1654       [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1655
1656 def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
1657   (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
1658                       "selp.b16 \t$dst, $a, $b, $p;",
1659       [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
1660 def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
1661   (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
1662                       "selp.b16 \t$dst, $a, $b, $p;",
1663       [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
1664 def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
1665   (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
1666                       "selp.b16 \t$dst, $a, $b, $p;",
1667       [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
1668 def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
1669   (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
1670                       "selp.b16 \t$dst, $a, $b, $p;",
1671       [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1672
1673 def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
1674   (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
1675                       "selp.b32 \t$dst, $a, $b, $p;",
1676       [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
1677 def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
1678   (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
1679                       "selp.b32 \t$dst, $a, $b, $p;",
1680       [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
1681 def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
1682   (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
1683                       "selp.b32 \t$dst, $a, $b, $p;",
1684       [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
1685 def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
1686   (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
1687                       "selp.b32 \t$dst, $a, $b, $p;",
1688       [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1689
1690 def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
1691   (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
1692                       "selp.b64 \t$dst, $a, $b, $p;",
1693       [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
1694 def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
1695   (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
1696                       "selp.b64 \t$dst, $a, $b, $p;",
1697       [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
1698 def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
1699   (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
1700                       "selp.b64 \t$dst, $a, $b, $p;",
1701       [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
1702 def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
1703   (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
1704                       "selp.b64 \t$dst, $a, $b, $p;",
1705       [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1706
1707 def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
1708   (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
1709                       "selp.f32 \t$dst, $a, $b, $p;",
1710       [(set Float32Regs:$dst,
1711         (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
1712 def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
1713   (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
1714                       "selp.f32 \t$dst, $a, $b, $p;",
1715       [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
1716 def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
1717   (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
1718                       "selp.f32 \t$dst, $a, $b, $p;",
1719       [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
1720 def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
1721   (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
1722                       "selp.f32 \t$dst, $a, $b, $p;",
1723       [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1724
1725 def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
1726   (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
1727                       "selp.f64 \t$dst, $a, $b, $p;",
1728       [(set Float64Regs:$dst,
1729         (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
1730 def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
1731   (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
1732                       "selp.f64 \t$dst, $a, $b, $p;",
1733       [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
1734 def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
1735   (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
1736                       "selp.f64 \t$dst, $a, $b, $p;",
1737       [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
1738 def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
1739   (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
1740                       "selp.f64 \t $dst, $a, $b, $p;",
1741       [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1742
1743 //def ld_param         : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1744 //                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1745
1746 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1747   SDTCisInt<2>]>;
1748 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1749   SDTCisInt<1>, SDTCisInt<2>]>;
1750 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1751 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1752 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1753 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1754 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1755 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1756 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1757 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1758 def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1759 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1760 def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
1761 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1762 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1763
1764 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1765                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1766 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1767   SDTDeclareScalarParamProfile,
1768                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1769 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1770   SDTDeclareParamProfile,
1771                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1772 def DeclareRet   : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1773                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1774 def LoadParam    : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1775                          [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1776 def PrintCall    : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1777                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1778 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1779                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1780 def StoreParam   : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1781                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1782 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1783                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1784 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1785                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1786 def MoveToParam  : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
1787                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1788 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1789                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1790 def CallArg      : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1791                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1792 def LastCallArg  : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1793                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1794 def CallArgEnd   : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1795                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1796 def CallVoid     : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1797                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1798 def Prototype    : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1799                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1800 def CallVal      : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1801                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1802 def MoveParam    : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1803                          []>;
1804 def MoveRetval   : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
1805                          [SDNPHasChain, SDNPSideEffect]>;
1806 def StoreRetval  : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1807                          [SDNPHasChain, SDNPSideEffect]>;
1808 def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
1809                          [SDNPHasChain, SDNPSideEffect]>;
1810 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1811   SDTPseudoUseParamProfile,
1812                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1813 def RETURNNode   : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1814                          [SDNPHasChain, SDNPSideEffect]>;
1815
1816 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1817       NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1818                 !strconcat(!strconcat("ld.param", opstr),
1819                 "\t$dst, [retval0+$b];"),
1820                 [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1821
1822 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1823       NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1824                 !strconcat(!strconcat("mov", opstr),
1825                 "\t$dst, retval$b;"),
1826                 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1827
1828 class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1829       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1830                 !strconcat(!strconcat("st.param", opstr),
1831                 "\t[param$a+$b], $val;"),
1832                 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1833
1834 class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
1835       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1836                 !strconcat(!strconcat("mov", opstr),
1837                 "\tparam$a, $val;"),
1838                 [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1839
1840 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1841       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1842                 !strconcat(!strconcat("st.param", opstr),
1843                 "\t[func_retval0+$a], $val;"),
1844                 [(StoreRetval (i32 imm:$a), regclass:$val)]>;
1845
1846 class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
1847       NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
1848                 !strconcat(!strconcat("mov", opstr),
1849                 "\tfunc_retval$num, $val;"),
1850                 [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
1851
1852 class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
1853       NVPTXInst<(outs), (ins regclass:$val),
1854                 !strconcat(!strconcat("mov", opstr),
1855                 "\tfunc_retval0, $val;"),
1856                 [(MoveRetval regclass:$val)]>;
1857
1858 def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1859 "call (retval0), ",
1860                                 [(PrintCall (i32 1))]>;
1861 def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1862 "call (retval0, retval1), ",
1863                                 [(PrintCall (i32 2))]>;
1864 def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1865 "call (retval0, retval1, retval2), ",
1866                                 [(PrintCall (i32 3))]>;
1867 def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1868 "call (retval0, retval1, retval2, retval3), ",
1869                                 [(PrintCall (i32 4))]>;
1870 def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1871 "call (retval0, retval1, retval2, retval3, retval4), ",
1872                                 [(PrintCall (i32 5))]>;
1873 def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1874 "call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1875                                 [(PrintCall (i32 6))]>;
1876 def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1877 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1878                                 [(PrintCall (i32 7))]>;
1879 def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1880 !strconcat("call (retval0, retval1, retval2, retval3, retval4",
1881            ", retval5, retval6, retval7), "),
1882                                 [(PrintCall (i32 8))]>;
1883
1884 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1885                                 [(PrintCall (i32 0))]>;
1886
1887 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1888 "call.uni (retval0), ",
1889                                 [(PrintCallUni (i32 1))]>;
1890 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1891 "call.uni (retval0, retval1), ",
1892                                 [(PrintCallUni (i32 2))]>;
1893 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1894 "call.uni (retval0, retval1, retval2), ",
1895                                 [(PrintCallUni (i32 3))]>;
1896 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1897 "call.uni (retval0, retval1, retval2, retval3), ",
1898                                 [(PrintCallUni (i32 4))]>;
1899 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1900 "call.uni (retval0, retval1, retval2, retval3, retval4), ",
1901                                 [(PrintCallUni (i32 5))]>;
1902 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1903 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1904                                 [(PrintCallUni (i32 6))]>;
1905 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1906 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1907                                 [(PrintCallUni (i32 7))]>;
1908 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1909 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1910            ", retval5, retval6, retval7), "),
1911                                 [(PrintCallUni (i32 8))]>;
1912
1913 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1914                                 [(PrintCallUni (i32 0))]>;
1915
1916 def LoadParamMemI64    : LoadParamMemInst<Int64Regs, ".b64">;
1917 def LoadParamMemI32    : LoadParamMemInst<Int32Regs, ".b32">;
1918 def LoadParamMemI16    : LoadParamMemInst<Int16Regs, ".b16">;
1919 def LoadParamMemI8     : LoadParamMemInst<Int8Regs, ".b8">;
1920
1921 //def LoadParamMemI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1922 //                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1923 //                "cvt.u16.u32\t$dst, temp_param_reg;"),
1924 //                [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1925 //def LoadParamMemI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1926 //                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1927 //                "cvt.u16.u32\t$dst, temp_param_reg;"),
1928 //                [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1929
1930 def LoadParamMemF32    : LoadParamMemInst<Float32Regs, ".f32">;
1931 def LoadParamMemF64    : LoadParamMemInst<Float64Regs, ".f64">;
1932
1933 def LoadParamRegI64    : LoadParamRegInst<Int64Regs, ".b64">;
1934 def LoadParamRegI32    : LoadParamRegInst<Int32Regs, ".b32">;
1935 def LoadParamRegI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1936                          "cvt.u16.u32\t$dst, retval$b;",
1937                          [(set Int16Regs:$dst,
1938                            (LoadParam (i32 0), (i32 imm:$b)))]>;
1939 def LoadParamRegI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1940                          "cvt.u16.u32\t$dst, retval$b;",
1941                          [(set Int8Regs:$dst,
1942                            (LoadParam (i32 0), (i32 imm:$b)))]>;
1943
1944 def LoadParamRegF32    : LoadParamRegInst<Float32Regs, ".f32">;
1945 def LoadParamRegF64    : LoadParamRegInst<Float64Regs, ".f64">;
1946
1947 def StoreParamI64    : StoreParamInst<Int64Regs, ".b64">;
1948 def StoreParamI32    : StoreParamInst<Int32Regs, ".b32">;
1949
1950 def StoreParamI16    : NVPTXInst<(outs),
1951   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1952                        "st.param.b16\t[param$a+$b], $val;",
1953            [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1954
1955 def StoreParamI8     : NVPTXInst<(outs),
1956   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1957                        "st.param.b8\t[param$a+$b], $val;",
1958                        [(StoreParam
1959                          (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1960
1961 def StoreParamS32I16 : NVPTXInst<(outs),
1962   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1963                  !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
1964                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
1965                  [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1966 def StoreParamU32I16 : NVPTXInst<(outs),
1967   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1968                  !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1969                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
1970                  [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1971
1972 def StoreParamU32I8   : NVPTXInst<(outs),
1973   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1974                  !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
1975                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
1976                  [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1977 def StoreParamS32I8   : NVPTXInst<(outs),
1978   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1979                  !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
1980                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
1981                  [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1982
1983 def StoreParamF32    : StoreParamInst<Float32Regs, ".f32">;
1984 def StoreParamF64    : StoreParamInst<Float64Regs, ".f64">;
1985
1986 def MoveToParamI64   : MoveToParamInst<Int64Regs, ".b64">;
1987 def MoveToParamI32   : MoveToParamInst<Int32Regs, ".b32">;
1988 def MoveToParamF64   : MoveToParamInst<Float64Regs, ".f64">;
1989 def MoveToParamF32   : MoveToParamInst<Float32Regs, ".f32">;
1990 def MoveToParamI16   : NVPTXInst<(outs),
1991   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1992                    !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1993                               "mov.b32\tparam$a, temp_param_reg;"),
1994                    [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1995 def MoveToParamI8    : NVPTXInst<(outs),
1996   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1997                    !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1998                               "mov.b32\tparam$a, temp_param_reg;"),
1999                    [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
2000
2001 def StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
2002 def StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;
2003 def StoreRetvalI16    : StoreRetvalInst<Int16Regs, ".b16">;
2004 def StoreRetvalI8     : StoreRetvalInst<Int8Regs, ".b8">;
2005
2006 //def StoreRetvalI16    : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
2007 //     !strconcat("\{\n\t",
2008 //     !strconcat(".reg .b32 temp_retval_reg;\n\t",
2009 //     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2010 //                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2011 //     [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
2012 //def StoreRetvalI8     : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
2013 //     !strconcat("\{\n\t",
2014 //     !strconcat(".reg .b32 temp_retval_reg;\n\t",
2015 //     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2016 //                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2017 //     [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
2018
2019 def StoreRetvalF64    : StoreRetvalInst<Float64Regs, ".f64">;
2020 def StoreRetvalF32    : StoreRetvalInst<Float32Regs, ".f32">;
2021
2022 def MoveRetvalI64    : MoveRetvalInst<Int64Regs, ".b64">;
2023 def MoveRetvalI32    : MoveRetvalInst<Int32Regs, ".b32">;
2024 def MoveRetvalI16    : MoveRetvalInst<Int16Regs, ".b16">;
2025 def MoveRetvalI8     : MoveRetvalInst<Int8Regs, ".b8">;
2026 def MoveRetvalF64    : MoveRetvalInst<Float64Regs, ".f64">;
2027 def MoveRetvalF32    : MoveRetvalInst<Float32Regs, ".f32">;
2028
2029 def MoveToRetvalI64    : MoveToRetvalInst<Int64Regs, ".b64">;
2030 def MoveToRetvalI32    : MoveToRetvalInst<Int32Regs, ".b32">;
2031 def MoveToRetvalF64    : MoveToRetvalInst<Float64Regs, ".f64">;
2032 def MoveToRetvalF32    : MoveToRetvalInst<Float32Regs, ".f32">;
2033 def MoveToRetvalI16    : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
2034                          "cvt.u32.u16\tfunc_retval$num, $val;",
2035                          [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
2036 def MoveToRetvalI8     : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
2037                          "cvt.u32.u16\tfunc_retval$num, $val;",
2038                          [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
2039
2040 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
2041 def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
2042 def CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
2043 def RETURNInst       : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
2044
2045 class CallArgInst<NVPTXRegClass regclass> :
2046       NVPTXInst<(outs), (ins regclass:$a), "$a, ",
2047                 [(CallArg (i32 0), regclass:$a)]>;
2048
2049 class LastCallArgInst<NVPTXRegClass regclass> :
2050       NVPTXInst<(outs), (ins regclass:$a), "$a",
2051                 [(LastCallArg (i32 0), regclass:$a)]>;
2052
2053 def CallArgI64     : CallArgInst<Int64Regs>;
2054 def CallArgI32     : CallArgInst<Int32Regs>;
2055 def CallArgI16     : CallArgInst<Int16Regs>;
2056 def CallArgI8      : CallArgInst<Int8Regs>;
2057
2058 def CallArgF64     : CallArgInst<Float64Regs>;
2059 def CallArgF32     : CallArgInst<Float32Regs>;
2060
2061 def LastCallArgI64 : LastCallArgInst<Int64Regs>;
2062 def LastCallArgI32 : LastCallArgInst<Int32Regs>;
2063 def LastCallArgI16 : LastCallArgInst<Int16Regs>;
2064 def LastCallArgI8  : LastCallArgInst<Int8Regs>;
2065
2066 def LastCallArgF64 : LastCallArgInst<Float64Regs>;
2067 def LastCallArgF32 : LastCallArgInst<Float32Regs>;
2068
2069 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
2070                               [(CallArg (i32 0), (i32 imm:$a))]>;
2071 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
2072                               [(LastCallArg (i32 0), (i32 imm:$a))]>;
2073
2074 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
2075                              [(CallArg (i32 1), (i32 imm:$a))]>;
2076 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
2077                              [(LastCallArg (i32 1), (i32 imm:$a))]>;
2078
2079 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
2080                              "$addr, ",
2081                              [(CallVoid (Wrapper tglobaladdr:$addr))]>;
2082 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
2083                              "$addr, ",
2084                              [(CallVoid Int32Regs:$addr)]>;
2085 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
2086                              "$addr, ",
2087                              [(CallVoid Int64Regs:$addr)]>;
2088 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
2089                              ", prototype_$val;",
2090                              [(Prototype (i32 imm:$val))]>;
2091
2092 def DeclareRetMemInst : NVPTXInst<(outs),
2093   (ins i32imm:$align, i32imm:$size, i32imm:$num),
2094          ".param .align $align .b8 retval$num[$size];",
2095          [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
2096 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2097          ".param .b$size retval$num;",
2098          [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
2099 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2100          ".reg .b$size retval$num;",
2101          [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
2102
2103 def DeclareParamInst : NVPTXInst<(outs),
2104   (ins i32imm:$align, i32imm:$a, i32imm:$size),
2105          ".param .align $align .b8 param$a[$size];",
2106          [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
2107 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2108          ".param .b$size param$a;",
2109          [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
2110 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2111          ".reg .b$size param$a;",
2112          [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
2113
2114 class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
2115       NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2116                 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
2117                 [(set regclass:$dst, (MoveParam regclass:$src))]>;
2118
2119 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
2120 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
2121 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
2122                    "cvt.u16.u32\t$dst, $src;",
2123                    [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
2124 def MoveParamI8  : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
2125                    "cvt.u16.u32\t$dst, $src;",
2126                    [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
2127 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
2128 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
2129
2130 class PseudoUseParamInst<NVPTXRegClass regclass> :
2131       NVPTXInst<(outs), (ins regclass:$src),
2132       "// Pseudo use of $src",
2133       [(PseudoUseParam regclass:$src)]>;
2134
2135 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
2136 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
2137 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
2138 def PseudoUseParamI8  : PseudoUseParamInst<Int8Regs>;
2139 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
2140 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
2141
2142
2143 //
2144 // Load / Store Handling
2145 //
2146 multiclass LD<NVPTXRegClass regclass> {
2147   def _avar : NVPTXInst<(outs regclass:$dst),
2148     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2149       i32imm:$fromWidth, imem:$addr),
2150 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2151            "$fromWidth \t$dst, [$addr];"), []>;
2152   def _areg : NVPTXInst<(outs regclass:$dst),
2153     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2154       i32imm:$fromWidth, Int32Regs:$addr),
2155 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2156            "$fromWidth \t$dst, [$addr];"), []>;
2157   def _areg_64 : NVPTXInst<(outs regclass:$dst),
2158     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2159      i32imm:$fromWidth, Int64Regs:$addr),
2160      !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2161                 " \t$dst, [$addr];"), []>;
2162   def _ari : NVPTXInst<(outs regclass:$dst),
2163     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2164       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2165 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2166            "$fromWidth \t$dst, [$addr+$offset];"), []>;
2167   def _ari_64 : NVPTXInst<(outs regclass:$dst),
2168     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2169      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2170     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2171                " \t$dst, [$addr+$offset];"), []>;
2172   def _asi : NVPTXInst<(outs regclass:$dst),
2173     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2174       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2175 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2176            "$fromWidth \t$dst, [$addr+$offset];"), []>;
2177 }
2178
2179 let mayLoad=1, neverHasSideEffects=1 in {
2180 defm LD_i8  : LD<Int8Regs>;
2181 defm LD_i16 : LD<Int16Regs>;
2182 defm LD_i32 : LD<Int32Regs>;
2183 defm LD_i64 : LD<Int64Regs>;
2184 defm LD_f32 : LD<Float32Regs>;
2185 defm LD_f64 : LD<Float64Regs>;
2186 }
2187
2188 multiclass ST<NVPTXRegClass regclass> {
2189   def _avar : NVPTXInst<(outs),
2190     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2191       LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2192 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2193            " \t[$addr], $src;"), []>;
2194   def _areg : NVPTXInst<(outs),
2195     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2196       LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2197 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2198            " \t[$addr], $src;"), []>;
2199   def _areg_64 : NVPTXInst<(outs),
2200     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2201      LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2202   !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2203                "\t[$addr], $src;"), []>;
2204   def _ari : NVPTXInst<(outs),
2205     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2206       LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2207 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2208            " \t[$addr+$offset], $src;"), []>;
2209   def _ari_64 : NVPTXInst<(outs),
2210     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2211      LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
2212   !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2213                "\t[$addr+$offset], $src;"), []>;
2214   def _asi : NVPTXInst<(outs),
2215     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2216       LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2217 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2218            " \t[$addr+$offset], $src;"), []>;
2219 }
2220
2221 let mayStore=1, neverHasSideEffects=1 in {
2222 defm ST_i8  : ST<Int8Regs>;
2223 defm ST_i16 : ST<Int16Regs>;
2224 defm ST_i32 : ST<Int32Regs>;
2225 defm ST_i64 : ST<Int64Regs>;
2226 defm ST_f32 : ST<Float32Regs>;
2227 defm ST_f64 : ST<Float64Regs>;
2228 }
2229
2230 // The following is used only in and after vector elementizations.
2231 // Vector elementization happens at the machine instruction level, so the
2232 // following instruction
2233 // never appears in the DAG.
2234 multiclass LD_VEC<NVPTXRegClass regclass> {
2235   def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2236     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2237       i32imm:$fromWidth, imem:$addr),
2238     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2239                "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2240   def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2241     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2242       i32imm:$fromWidth, Int32Regs:$addr),
2243     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2244                "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2245   def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2246     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2247      i32imm:$fromWidth, Int64Regs:$addr),
2248     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2249                "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2250   def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2251     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2252       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2253     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2254                "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2255   def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2256     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2257      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2258     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2259                "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2260   def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2261     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2262       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2263     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2264                "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2265   def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2266       regclass:$dst3, regclass:$dst4),
2267     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2268       i32imm:$fromWidth, imem:$addr),
2269     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2270                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2271   def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2272       regclass:$dst4),
2273     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2274       i32imm:$fromWidth, Int32Regs:$addr),
2275     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2276                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2277   def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2278                                regclass:$dst3, regclass:$dst4),
2279     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2280      i32imm:$fromWidth, Int64Regs:$addr),
2281     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2282                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2283   def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2284       regclass:$dst4),
2285     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2286       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2287     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2288                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2289                 []>;
2290   def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2291                               regclass:$dst3, regclass:$dst4),
2292     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2293      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2294     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2295                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2296     []>;
2297   def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2298       regclass:$dst4),
2299     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2300       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2301     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2302                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2303                 []>;
2304 }
2305 let mayLoad=1, neverHasSideEffects=1 in {
2306 defm LDV_i8  : LD_VEC<Int8Regs>;
2307 defm LDV_i16 : LD_VEC<Int16Regs>;
2308 defm LDV_i32 : LD_VEC<Int32Regs>;
2309 defm LDV_i64 : LD_VEC<Int64Regs>;
2310 defm LDV_f32 : LD_VEC<Float32Regs>;
2311 defm LDV_f64 : LD_VEC<Float64Regs>;
2312 }
2313
2314 multiclass ST_VEC<NVPTXRegClass regclass> {
2315   def _v2_avar : NVPTXInst<(outs),
2316     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2317       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2318     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2319                "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2320   def _v2_areg : NVPTXInst<(outs),
2321     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2322       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2323     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2324                "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2325   def _v2_areg_64 : NVPTXInst<(outs),
2326     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2327      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2328     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2329                "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2330   def _v2_ari : NVPTXInst<(outs),
2331     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2332       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2333       i32imm:$offset),
2334     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2335                "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2336   def _v2_ari_64 : NVPTXInst<(outs),
2337     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2338      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2339      i32imm:$offset),
2340     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2341                "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2342   def _v2_asi : NVPTXInst<(outs),
2343     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2344       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2345       i32imm:$offset),
2346     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2347                "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2348   def _v4_avar : NVPTXInst<(outs),
2349     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2350       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2351       i32imm:$fromWidth, imem:$addr),
2352     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2353                "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2354   def _v4_areg : NVPTXInst<(outs),
2355     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2356       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2357       i32imm:$fromWidth, Int32Regs:$addr),
2358     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2359                "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2360   def _v4_areg_64 : NVPTXInst<(outs),
2361     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2362      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2363      i32imm:$fromWidth, Int64Regs:$addr),
2364     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2365                "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2366   def _v4_ari : NVPTXInst<(outs),
2367     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2368       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2369       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2370     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2371                "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2372     []>;
2373   def _v4_ari_64 : NVPTXInst<(outs),
2374     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2375      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2376      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2377     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2378                "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2379      []>;
2380   def _v4_asi : NVPTXInst<(outs),
2381     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2382       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2383       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2384     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2385                "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2386     []>;
2387 }
2388 let mayStore=1, neverHasSideEffects=1 in {
2389 defm STV_i8  : ST_VEC<Int8Regs>;
2390 defm STV_i16 : ST_VEC<Int16Regs>;
2391 defm STV_i32 : ST_VEC<Int32Regs>;
2392 defm STV_i64 : ST_VEC<Int64Regs>;
2393 defm STV_f32 : ST_VEC<Float32Regs>;
2394 defm STV_f64 : ST_VEC<Float64Regs>;
2395 }
2396
2397
2398 //---- Conversion ----
2399
2400 multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
2401 // FIXME: need to add f16 support
2402 //  def CVTf16i8 :
2403 //    NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
2404 //              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
2405 //        [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
2406 //  def CVTf16i16 :
2407 //    NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
2408 //              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
2409 //        [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
2410 //  def CVTf16i32 :
2411 //    NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
2412 //              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
2413 //        [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
2414 //  def CVTf16i64:
2415 //    NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
2416 //          !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2417 //            [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2418
2419   def CVTf32i1 :
2420     NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
2421               "selp.f32 \t$d, 1.0, 0.0, $a;",
2422         [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
2423   def CVTf32i8 :
2424     NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
2425               !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
2426         [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
2427   def CVTf32i16 :
2428     NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
2429               !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
2430         [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
2431   def CVTf32i32 :
2432     NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
2433               !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
2434         [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
2435   def CVTf32i64:
2436     NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
2437           !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2438             [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2439
2440   def CVTf64i1 :
2441     NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
2442               "selp.f64 \t$d, 1.0, 0.0, $a;",
2443         [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
2444   def CVTf64i8 :
2445     NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
2446               !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
2447         [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
2448   def CVTf64i16 :
2449     NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
2450               !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
2451         [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
2452   def CVTf64i32 :
2453     NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
2454               !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
2455         [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
2456   def CVTf64i64:
2457     NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
2458           !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
2459             [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
2460 }
2461
2462 defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
2463 defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
2464
2465 multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
2466 // FIXME: need to add f16 support
2467 //  def CVTi8f16:
2468 //    NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
2469 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
2470 //        [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
2471   def CVTi8f32_ftz:
2472     NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2473               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2474         [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2475   def CVTi8f32:
2476     NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2477               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2478         [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
2479   def CVTi8f64:
2480     NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
2481               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2482         [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
2483
2484 // FIXME: need to add f16 support
2485 //  def CVTi16f16:
2486 //    NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
2487 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
2488 //        [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
2489   def CVTi16f32_ftz:
2490     NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2491               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2492         [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2493   def CVTi16f32:
2494     NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2495               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2496         [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
2497   def CVTi16f64:
2498     NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
2499               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2500         [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
2501
2502 // FIXME: need to add f16 support
2503 //  def CVTi32f16:  def CVTi32f16:
2504 //    NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
2505 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
2506 //        [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
2507   def CVTi32f32_ftz:
2508     NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2509               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
2510         [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2511   def CVTi32f32:
2512     NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2513               !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
2514         [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
2515   def CVTi32f64:
2516     NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
2517               !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
2518         [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
2519
2520 // FIXME: need to add f16 support
2521 //  def CVTi64f16:
2522 //    NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
2523 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
2524 //        [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
2525   def CVTi64f32_ftz:
2526     NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2527               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
2528         [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2529   def CVTi64f32:
2530     NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2531               !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
2532         [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
2533   def CVTi64f64:
2534     NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
2535               !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
2536         [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
2537 }
2538
2539 defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
2540 defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
2541
2542 multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
2543   def ext1to8:
2544        NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2545            "selp.u16 \t$d, 1, 0, $a;",
2546      [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2547   def ext1to16:
2548        NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2549            "selp.u16 \t$d, 1, 0, $a;",
2550      [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2551   def ext1to32:
2552        NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2553            "selp.u32 \t$d, 1, 0, $a;",
2554      [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2555   def ext1to64:
2556        NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2557            "selp.u64 \t$d, 1, 0, $a;",
2558      [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2559 }
2560
2561 multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
2562   def ext1to8:
2563        NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2564            "selp.s16 \t$d, -1, 0, $a;",
2565      [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2566   def ext1to16:
2567        NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2568            "selp.s16 \t$d, -1, 0, $a;",
2569      [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2570   def ext1to32:
2571        NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2572            "selp.s32 \t$d, -1, 0, $a;",
2573      [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2574   def ext1to64:
2575        NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2576            "selp.s64 \t$d, -1, 0, $a;",
2577      [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2578 }
2579
2580 multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
2581   // All Int8Regs are emiited as 16bit registers in ptx.
2582   // And there is no selp.u8 in ptx.
2583   def ext8to16:
2584        NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
2585            !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
2586              !strconcat(OpStr, "8 \t$d, $a;")))),
2587      [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
2588   def ext8to32:
2589        NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
2590            !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2591              !strconcat(OpStr, "8 \t$d, $a;")))),
2592      [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
2593   def ext8to64:
2594        NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
2595            !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2596              !strconcat(OpStr, "8 \t$d, $a;")))),
2597      [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
2598   def ext16to32:
2599        NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
2600            !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2601              !strconcat(OpStr, "16 \t$d, $a;")))),
2602      [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
2603   def ext16to64:
2604        NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
2605            !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2606              !strconcat(OpStr, "16 \t$d, $a;")))),
2607      [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
2608   def ext32to64:
2609        NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
2610            !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2611              !strconcat(OpStr, "32 \t$d, $a;")))),
2612      [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
2613 }
2614
2615 defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
2616 defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
2617 defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
2618
2619 defm Sint_extend : INT_EXTEND <"s", sext>;
2620 defm Zint_extend : INT_EXTEND <"u", zext>;
2621 defm Aint_extend : INT_EXTEND <"u", anyext>;
2622
2623 class TRUNC_to1_asm<string sz> {
2624   string s = !strconcat("{{\n\t",
2625              !strconcat(".reg ",
2626              !strconcat(sz,
2627              !strconcat(" temp;\n\t",
2628              !strconcat("and",
2629              !strconcat(sz,
2630              !strconcat("\t temp, $a, 1;\n\t",
2631              !strconcat("setp",
2632              !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
2633 }
2634
2635 def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2636              "cvt.u32.u64 \t$d, $a;",
2637        [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
2638 def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
2639              "cvt.u16.u64 \t$d, $a;",
2640        [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
2641 def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
2642              "cvt.u8.u64 \t$d, $a;",
2643        [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
2644 def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
2645              "cvt.u16.u32 \t$d, $a;",
2646        [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
2647 def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
2648              "cvt.u8.u32 \t$d, $a;",
2649        [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
2650 def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
2651              "cvt.u8.u16 \t$d, $a;",
2652        [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
2653 def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2654              TRUNC_to1_asm<".b64">.s,
2655              [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
2656 def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2657              TRUNC_to1_asm<".b32">.s,
2658              [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
2659 def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
2660              TRUNC_to1_asm<".b16">.s,
2661              [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
2662 def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
2663              TRUNC_to1_asm<".b16">.s,
2664              [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
2665
2666 // Select instructions
2667 def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
2668           (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
2669 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2670           (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
2671             (TRUNC_32to1 Int32Regs:$pred))>;
2672 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2673           (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
2674             (TRUNC_32to1 Int32Regs:$pred))>;
2675 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2676           (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
2677             (TRUNC_32to1 Int32Regs:$pred))>;
2678 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2679           (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
2680             (TRUNC_32to1 Int32Regs:$pred))>;
2681 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2682           (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
2683             (TRUNC_32to1 Int32Regs:$pred))>;
2684
2685 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2686   NVPTXRegClass regclassOut> :
2687            NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2688            !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2689      [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2690
2691 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2692 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2693 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2694 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2695
2696 // pack a set of smaller int registers to a larger int register
2697 def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
2698                           (ins Int8Regs:$s1, Int8Regs:$s2,
2699                                Int8Regs:$s3, Int8Regs:$s4),
2700                           !strconcat("{{\n\t.reg .b8\t%t<4>;",
2701                           !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2702                           !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2703                           !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
2704                           !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
2705                            "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
2706                           []>;
2707 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2708                           (ins Int16Regs:$s1, Int16Regs:$s2,
2709                                Int16Regs:$s3, Int16Regs:$s4),
2710                           "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2711                           []>;
2712 def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
2713                           (ins Int8Regs:$s1, Int8Regs:$s2),
2714                           !strconcat("{{\n\t.reg .b8\t%t<2>;",
2715                           !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2716                           !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2717                                      "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
2718                           []>;
2719 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2720                           (ins Int16Regs:$s1, Int16Regs:$s2),
2721                           "mov.b32\t$d, {{$s1, $s2}};",
2722                           []>;
2723 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2724                           (ins Int32Regs:$s1, Int32Regs:$s2),
2725                           "mov.b64\t$d, {{$s1, $s2}};",
2726                           []>;
2727 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2728                           (ins Float32Regs:$s1, Float32Regs:$s2),
2729                           "mov.b64\t$d, {{$s1, $s2}};",
2730                           []>;
2731
2732 // unpack a larger int register to a set of smaller int registers
2733 def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
2734                                 Int8Regs:$d3, Int8Regs:$d4),
2735                           (ins Int32Regs:$s),
2736                           !strconcat("{{\n\t.reg .b8\t%t<4>;",
2737                           !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
2738                           !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2739                           !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
2740                           !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
2741                                      "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
2742                           []>;
2743 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2744                                  Int16Regs:$d3, Int16Regs:$d4),
2745                            (ins Int64Regs:$s),
2746                            "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2747                           []>;
2748 def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
2749                           (ins Int16Regs:$s),
2750                           !strconcat("{{\n\t.reg .b8\t%t<2>;",
2751                           !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
2752                           !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2753                                      "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
2754                           []>;
2755 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2756                            (ins Int32Regs:$s),
2757                            "mov.b32\t{{$d1, $d2}}, $s;",
2758                           []>;
2759 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2760                            (ins Int64Regs:$s),
2761                            "mov.b64\t{{$d1, $d2}}, $s;",
2762                           []>;
2763 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2764                            (ins Float64Regs:$s),
2765                            "mov.b64\t{{$d1, $d2}}, $s;",
2766                           []>;
2767
2768 def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2769             "cvt.rn.ftz.f32.f64 \t$d, $a;",
2770       [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
2771
2772 def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2773             "cvt.rn.f32.f64 \t$d, $a;",
2774       [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
2775
2776 def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2777             "cvt.ftz.f64.f32 \t$d, $a;",
2778       [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2779
2780 def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2781             "cvt.f64.f32 \t$d, $a;",
2782       [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
2783
2784 def retflag       : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2785                            [SDNPHasChain, SDNPOptInGlue]>;
2786
2787 //-----------------------------------
2788 // Control-flow
2789 //-----------------------------------
2790
2791 let isTerminator=1 in {
2792    let isReturn=1, isBarrier=1 in
2793       def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2794
2795    let isBranch=1 in
2796       def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2797                           "@$a bra \t$target;",
2798                            [(brcond Int1Regs:$a, bb:$target)]>;
2799    let isBranch=1 in
2800       def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2801                           "@!$a bra \t$target;",
2802                            []>;
2803
2804    let isBranch=1, isBarrier=1 in
2805       def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2806                         "bra.uni \t$target;",
2807                   [(br bb:$target)]>;
2808 }
2809
2810 def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
2811     (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
2812
2813 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2814 // conditional branch if
2815 // the target block is the next block so that the code can fall through to the
2816 // target block.
2817 // The invertion is done by 'xor condition, 1', which will be translated to
2818 // (setne condition, -1).
2819 // Since ptx supports '@!pred bra target', we should use it.
2820 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2821   (CBranchOther Int1Regs:$a, bb:$target)>;
2822
2823 // Call
2824 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2825 def SDT_NVPTXCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2826                                         SDTCisVT<1, i32> ]>;
2827
2828 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2829                            [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2830 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_NVPTXCallSeqEnd,
2831                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2832                            SDNPSideEffect]>;
2833
2834 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2835 def call          : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2836                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2837 def calltarget : Operand<i32>;
2838 let isCall=1 in {
2839    def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2840                   "call \t$dst, (1);", []>;
2841 }
2842
2843 def : Pat<(call tglobaladdr:$dst),
2844           (CALL tglobaladdr:$dst)>;
2845 def : Pat<(call texternalsym:$dst),
2846           (CALL texternalsym:$dst)>;
2847
2848 // Pseudo instructions.
2849 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2850    : NVPTXInst<outs, ins, asmstr, pattern>;
2851
2852 // @TODO: We use some tricks here to emit curly braces.  Can we clean this up
2853 // a bit without TableGen modifications?
2854 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2855   "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2856                                [(callseq_start timm:$amt)]>;
2857 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2858   "\n\t//{{\n\t}}// Callseq End $amt1",
2859                             [(callseq_end timm:$amt1, timm:$amt2)]>;
2860
2861 // trap instruction
2862
2863 def trapinst : NVPTXInst<(outs), (ins),
2864                          "trap;",
2865                          [(trap)]>;
2866
2867 include "NVPTXIntrinsics.td"
2868
2869
2870 //-----------------------------------
2871 // Notes
2872 //-----------------------------------
2873 // BSWAP is currently expanded. The following is a more efficient
2874 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2875 // - for sm_20, use pmpt (use vector scalar mov to get the pack and
2876 //   unpack). sm_20 supports native 32-bit register, but not native 16-bit
2877 // register.