contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

   1 //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file contains instruction defs that are common to all hw codegen
  11 // targets.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 class AMDGPUInst <dag outs, dag ins, string asm = "",
  16   list<dag> pattern = []> : Instruction {
  17   field bit isRegisterLoad = 0;
  18   field bit isRegisterStore = 0;
  19
  20   let Namespace = "AMDGPU";
  21   let OutOperandList = outs;
  22   let InOperandList = ins;
  23   let AsmString = asm;
  24   let Pattern = pattern;
  25   let Itinerary = NullALU;
  26
  27   // SoftFail is a field the disassembler can use to provide a way for
  28   // instructions to not match without killing the whole decode process. It is
  29   // mainly used for ARM, but Tablegen expects this field to exist or it fails
  30   // to build the decode table.
  31   field bits<64> SoftFail = 0;
  32
  33   let DecoderNamespace = Namespace;
  34
  35   let TSFlags{63} = isRegisterLoad;
  36   let TSFlags{62} = isRegisterStore;
  37 }
  38
  39 class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
  40   list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
  41
  42   field bits<32> Inst = 0xffffffff;
  43 }
  44
  45 //===---------------------------------------------------------------------===//
  46 // Return instruction
  47 //===---------------------------------------------------------------------===//
  48
  49 class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
  50 : Instruction {
  51
  52      let Namespace = "AMDGPU";
  53      dag OutOperandList = outs;
  54      dag InOperandList = ins;
  55      let Pattern = pattern;
  56      let AsmString = !strconcat(asmstr, "\n");
  57      let isPseudo = 1;
  58      let Itinerary = NullALU;
  59      bit hasIEEEFlag = 0;
  60      bit hasZeroOpFlag = 0;
  61      let mayLoad = 0;
  62      let mayStore = 0;
  63      let hasSideEffects = 0;
  64      let isCodeGenOnly = 1;
  65 }
  66
  67 def TruePredicate : Predicate<"true">;
  68
  69 // Exists to help track down where SubtargetPredicate isn't set rather
  70 // than letting tablegen crash with an unhelpful error.
  71 def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
  72
  73 class PredicateControl {
  74   Predicate SubtargetPredicate = InvalidPred;
  75   list<Predicate> AssemblerPredicates = [];
  76   Predicate AssemblerPredicate = TruePredicate;
  77   list<Predicate> OtherPredicates = [];
  78   list<Predicate> Predicates = !listconcat([SubtargetPredicate,
  79                                             AssemblerPredicate],
  80                                             AssemblerPredicates,
  81                                             OtherPredicates);
  82 }
  83 class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
  84       PredicateControl;
  85
  86 def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
  87 def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
  88 def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
  89 def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
  90 def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
  91 def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
  92 def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
  93 def FMA : Predicate<"Subtarget->hasFMA()">;
  94
  95 def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
  96
  97 def u16ImmTarget : AsmOperandClass {
  98   let Name = "U16Imm";
  99   let RenderMethod = "addImmOperands";
 100 }
 101
 102 def s16ImmTarget : AsmOperandClass {
 103   let Name = "S16Imm";
 104   let RenderMethod = "addImmOperands";
 105 }
 106
 107 let OperandType = "OPERAND_IMMEDIATE" in {
 108
 109 def u32imm : Operand<i32> {
 110   let PrintMethod = "printU32ImmOperand";
 111 }
 112
 113 def u16imm : Operand<i16> {
 114   let PrintMethod = "printU16ImmOperand";
 115   let ParserMatchClass = u16ImmTarget;
 116 }
 117
 118 def s16imm : Operand<i16> {
 119   let PrintMethod = "printU16ImmOperand";
 120   let ParserMatchClass = s16ImmTarget;
 121 }
 122
 123 def u8imm : Operand<i8> {
 124   let PrintMethod = "printU8ImmOperand";
 125 }
 126
 127 } // End OperandType = "OPERAND_IMMEDIATE"
 128
 129 //===--------------------------------------------------------------------===//
 130 // Custom Operands
 131 //===--------------------------------------------------------------------===//
 132 def brtarget   : Operand<OtherVT>;
 133
 134 //===----------------------------------------------------------------------===//
 135 // Misc. PatFrags
 136 //===----------------------------------------------------------------------===//
 137
 138 class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
 139   (ops node:$src0, node:$src1),
 140   (op $src0, $src1),
 141   [{ return N->hasOneUse(); }]
 142 >;
 143
 144 class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
 145   (ops node:$src0, node:$src1, node:$src2),
 146   (op $src0, $src1, $src2),
 147   [{ return N->hasOneUse(); }]
 148 >;
 149
 150 let Properties = [SDNPCommutative, SDNPAssociative] in {
 151 def smax_oneuse : HasOneUseBinOp<smax>;
 152 def smin_oneuse : HasOneUseBinOp<smin>;
 153 def umax_oneuse : HasOneUseBinOp<umax>;
 154 def umin_oneuse : HasOneUseBinOp<umin>;
 155 def fminnum_oneuse : HasOneUseBinOp<fminnum>;
 156 def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
 157 def and_oneuse : HasOneUseBinOp<and>;
 158 def or_oneuse : HasOneUseBinOp<or>;
 159 def xor_oneuse : HasOneUseBinOp<xor>;
 160 } // Properties = [SDNPCommutative, SDNPAssociative]
 161
 162 def add_oneuse : HasOneUseBinOp<add>;
 163 def sub_oneuse : HasOneUseBinOp<sub>;
 164
 165 def srl_oneuse : HasOneUseBinOp<srl>;
 166 def shl_oneuse : HasOneUseBinOp<shl>;
 167
 168 def select_oneuse : HasOneUseTernaryOp<select>;
 169
 170 def srl_16 : PatFrag<
 171   (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
 172 >;
 173
 174
 175 def hi_i16_elt : PatFrag<
 176   (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
 177 >;
 178
 179
 180 def hi_f16_elt : PatLeaf<
 181   (vt), [{
 182   if (N->getOpcode() != ISD::BITCAST)
 183     return false;
 184   SDValue Tmp = N->getOperand(0);
 185
 186   if (Tmp.getOpcode() != ISD::SRL)
 187     return false;
 188     if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
 189       return RHS->getZExtValue() == 16;
 190     return false;
 191 }]>;
 192
 193 //===----------------------------------------------------------------------===//
 194 // PatLeafs for floating-point comparisons
 195 //===----------------------------------------------------------------------===//
 196
 197 def COND_OEQ : PatLeaf <
 198   (cond),
 199   [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
 200 >;
 201
 202 def COND_ONE : PatLeaf <
 203   (cond),
 204   [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
 205 >;
 206
 207 def COND_OGT : PatLeaf <
 208   (cond),
 209   [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
 210 >;
 211
 212 def COND_OGE : PatLeaf <
 213   (cond),
 214   [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
 215 >;
 216
 217 def COND_OLT : PatLeaf <
 218   (cond),
 219   [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
 220 >;
 221
 222 def COND_OLE : PatLeaf <
 223   (cond),
 224   [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
 225 >;
 226
 227 def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
 228 def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
 229
 230 //===----------------------------------------------------------------------===//
 231 // PatLeafs for unsigned / unordered comparisons
 232 //===----------------------------------------------------------------------===//
 233
 234 def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
 235 def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
 236 def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
 237 def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
 238 def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
 239 def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
 240
 241 // XXX - For some reason R600 version is preferring to use unordered
 242 // for setne?
 243 def COND_UNE_NE : PatLeaf <
 244   (cond),
 245   [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
 246 >;
 247
 248 //===----------------------------------------------------------------------===//
 249 // PatLeafs for signed comparisons
 250 //===----------------------------------------------------------------------===//
 251
 252 def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
 253 def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
 254 def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
 255 def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
 256
 257 //===----------------------------------------------------------------------===//
 258 // PatLeafs for integer equality
 259 //===----------------------------------------------------------------------===//
 260
 261 def COND_EQ : PatLeaf <
 262   (cond),
 263   [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
 264 >;
 265
 266 def COND_NE : PatLeaf <
 267   (cond),
 268   [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
 269 >;
 270
 271 def COND_NULL : PatLeaf <
 272   (cond),
 273   [{(void)N; return false;}]
 274 >;
 275
 276 //===----------------------------------------------------------------------===//
 277 // PatLeafs for Texture Constants
 278 //===----------------------------------------------------------------------===//
 279
 280 def TEX_ARRAY : PatLeaf<
 281   (imm),
 282   [{uint32_t TType = (uint32_t)N->getZExtValue();
 283     return TType == 9 || TType == 10 || TType == 16;
 284   }]
 285 >;
 286
 287 def TEX_RECT : PatLeaf<
 288   (imm),
 289   [{uint32_t TType = (uint32_t)N->getZExtValue();
 290     return TType == 5;
 291   }]
 292 >;
 293
 294 def TEX_SHADOW : PatLeaf<
 295   (imm),
 296   [{uint32_t TType = (uint32_t)N->getZExtValue();
 297     return (TType >= 6 && TType <= 8) || TType == 13;
 298   }]
 299 >;
 300
 301 def TEX_SHADOW_ARRAY : PatLeaf<
 302   (imm),
 303   [{uint32_t TType = (uint32_t)N->getZExtValue();
 304     return TType == 11 || TType == 12 || TType == 17;
 305   }]
 306 >;
 307
 308 //===----------------------------------------------------------------------===//
 309 // Load/Store Pattern Fragments
 310 //===----------------------------------------------------------------------===//
 311
 312 class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
 313   return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
 314 }]>;
 315
 316 class Aligned16Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
 317   return cast<MemSDNode>(N)->getAlignment() >= 16;
 318 }]>;
 319
 320 class LoadFrag <SDPatternOperator op> : PatFrag<(ops node:$ptr), (op node:$ptr)>;
 321
 322 class StoreFrag<SDPatternOperator op> : PatFrag <
 323   (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
 324 >;
 325
 326 class StoreHi16<SDPatternOperator op> : PatFrag <
 327   (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
 328 >;
 329
 330 class PrivateAddress : CodePatPred<[{
 331   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
 332 }]>;
 333
 334 class ConstantAddress : CodePatPred<[{
 335   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
 336 }]>;
 337
 338 class LocalAddress : CodePatPred<[{
 339   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
 340 }]>;
 341
 342 class GlobalAddress : CodePatPred<[{
 343   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
 344 }]>;
 345
 346 class GlobalLoadAddress : CodePatPred<[{
 347   auto AS = cast<MemSDNode>(N)->getAddressSpace();
 348   return AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS;
 349 }]>;
 350
 351 class FlatLoadAddress : CodePatPred<[{
 352   const auto AS = cast<MemSDNode>(N)->getAddressSpace();
 353   return AS == AMDGPUASI.FLAT_ADDRESS ||
 354          AS == AMDGPUASI.GLOBAL_ADDRESS ||
 355          AS == AMDGPUASI.CONSTANT_ADDRESS;
 356 }]>;
 357
 358 class FlatStoreAddress : CodePatPred<[{
 359   const auto AS = cast<MemSDNode>(N)->getAddressSpace();
 360   return AS == AMDGPUASI.FLAT_ADDRESS ||
 361          AS == AMDGPUASI.GLOBAL_ADDRESS;
 362 }]>;
 363
 364 class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
 365                                               (ld_node node:$ptr), [{
 366   LoadSDNode *L = cast<LoadSDNode>(N);
 367   return L->getExtensionType() == ISD::ZEXTLOAD ||
 368          L->getExtensionType() == ISD::EXTLOAD;
 369 }]>;
 370
 371 def az_extload : AZExtLoadBase <unindexedload>;
 372
 373 def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
 374   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
 375 }]>;
 376
 377 def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
 378   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
 379 }]>;
 380
 381 def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
 382   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
 383 }]>;
 384
 385 class PrivateLoad <SDPatternOperator op> : LoadFrag <op>, PrivateAddress;
 386 class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
 387
 388 class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
 389 class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
 390
 391 class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalLoadAddress;
 392 class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
 393
 394 class FlatLoad <SDPatternOperator op> : LoadFrag <op>, FlatLoadAddress;
 395 class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress;
 396
 397 class ConstantLoad <SDPatternOperator op> : LoadFrag <op>, ConstantAddress;
 398
 399
 400 def load_private : PrivateLoad <load>;
 401 def az_extloadi8_private : PrivateLoad <az_extloadi8>;
 402 def sextloadi8_private : PrivateLoad <sextloadi8>;
 403 def az_extloadi16_private : PrivateLoad <az_extloadi16>;
 404 def sextloadi16_private : PrivateLoad <sextloadi16>;
 405
 406 def store_private : PrivateStore <store>;
 407 def truncstorei8_private : PrivateStore<truncstorei8>;
 408 def truncstorei16_private : PrivateStore <truncstorei16>;
 409 def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
 410 def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
 411
 412
 413 def load_global : GlobalLoad <load>;
 414 def sextloadi8_global : GlobalLoad <sextloadi8>;
 415 def az_extloadi8_global : GlobalLoad <az_extloadi8>;
 416 def sextloadi16_global : GlobalLoad <sextloadi16>;
 417 def az_extloadi16_global : GlobalLoad <az_extloadi16>;
 418 def atomic_load_global : GlobalLoad<atomic_load>;
 419
 420 def store_global : GlobalStore <store>;
 421 def truncstorei8_global : GlobalStore <truncstorei8>;
 422 def truncstorei16_global : GlobalStore <truncstorei16>;
 423 def store_atomic_global : GlobalStore<atomic_store>;
 424 def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
 425 def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
 426
 427 def load_local : LocalLoad <load>;
 428 def az_extloadi8_local : LocalLoad <az_extloadi8>;
 429 def sextloadi8_local : LocalLoad <sextloadi8>;
 430 def az_extloadi16_local : LocalLoad <az_extloadi16>;
 431 def sextloadi16_local : LocalLoad <sextloadi16>;
 432 def atomic_load_32_local : LocalLoad<atomic_load_32>;
 433 def atomic_load_64_local : LocalLoad<atomic_load_64>;
 434
 435 def store_local : LocalStore <store>;
 436 def truncstorei8_local : LocalStore <truncstorei8>;
 437 def truncstorei16_local : LocalStore <truncstorei16>;
 438 def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
 439 def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
 440 def atomic_store_local : LocalStore <atomic_store>;
 441
 442 def load_align8_local : Aligned8Bytes <
 443   (ops node:$ptr), (load_local node:$ptr)
 444 >;
 445
 446 def load_align16_local : Aligned16Bytes <
 447   (ops node:$ptr), (load_local node:$ptr)
 448 >;
 449
 450 def store_align8_local : Aligned8Bytes <
 451   (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
 452 >;
 453
 454 def store_align16_local : Aligned16Bytes <
 455   (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
 456 >;
 457
 458 def load_flat          : FlatLoad <load>;
 459 def az_extloadi8_flat  : FlatLoad <az_extloadi8>;
 460 def sextloadi8_flat    : FlatLoad <sextloadi8>;
 461 def az_extloadi16_flat : FlatLoad <az_extloadi16>;
 462 def sextloadi16_flat   : FlatLoad <sextloadi16>;
 463 def atomic_load_flat   : FlatLoad<atomic_load>;
 464
 465 def store_flat         : FlatStore <store>;
 466 def truncstorei8_flat  : FlatStore <truncstorei8>;
 467 def truncstorei16_flat : FlatStore <truncstorei16>;
 468 def atomic_store_flat  : FlatStore <atomic_store>;
 469 def truncstorei8_hi16_flat  : StoreHi16<truncstorei8>, FlatStoreAddress;
 470 def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
 471
 472
 473 def constant_load : ConstantLoad<load>;
 474 def sextloadi8_constant : ConstantLoad <sextloadi8>;
 475 def az_extloadi8_constant : ConstantLoad <az_extloadi8>;
 476 def sextloadi16_constant : ConstantLoad <sextloadi16>;
 477 def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
 478
 479
 480 class local_binary_atomic_op<SDNode atomic_op> :
 481   PatFrag<(ops node:$ptr, node:$value),
 482     (atomic_op node:$ptr, node:$value), [{
 483   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
 484 }]>;
 485
 486 def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
 487 def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
 488 def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
 489 def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
 490 def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
 491 def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
 492 def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
 493 def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
 494 def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
 495 def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
 496 def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
 497
 498 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
 499                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
 500   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
 501 }]>;
 502
 503 class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
 504     (ops node:$ptr, node:$cmp, node:$swap),
 505     (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
 506       AtomicSDNode *AN = cast<AtomicSDNode>(N);
 507       return AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
 508 }]>;
 509
 510 def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
 511
 512 multiclass global_binary_atomic_op<SDNode atomic_op> {
 513   def "" : PatFrag<
 514         (ops node:$ptr, node:$value),
 515         (atomic_op node:$ptr, node:$value),
 516         [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
 517
 518   def _noret : PatFrag<
 519         (ops node:$ptr, node:$value),
 520         (atomic_op node:$ptr, node:$value),
 521         [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
 522
 523   def _ret : PatFrag<
 524         (ops node:$ptr, node:$value),
 525         (atomic_op node:$ptr, node:$value),
 526         [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
 527 }
 528
 529 defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
 530 defm atomic_add_global : global_binary_atomic_op<atomic_load_add>;
 531 defm atomic_and_global : global_binary_atomic_op<atomic_load_and>;
 532 defm atomic_max_global : global_binary_atomic_op<atomic_load_max>;
 533 defm atomic_min_global : global_binary_atomic_op<atomic_load_min>;
 534 defm atomic_or_global : global_binary_atomic_op<atomic_load_or>;
 535 defm atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
 536 defm atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
 537 defm atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
 538 defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
 539
 540 // Legacy.
 541 def AMDGPUatomic_cmp_swap_global : PatFrag<
 542   (ops node:$ptr, node:$value),
 543   (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress;
 544
 545 def atomic_cmp_swap_global : PatFrag<
 546   (ops node:$ptr, node:$cmp, node:$value),
 547   (atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress;
 548
 549
 550 def atomic_cmp_swap_global_noret : PatFrag<
 551   (ops node:$ptr, node:$cmp, node:$value),
 552   (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
 553   [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
 554
 555 def atomic_cmp_swap_global_ret : PatFrag<
 556   (ops node:$ptr, node:$cmp, node:$value),
 557   (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
 558   [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
 559
 560 //===----------------------------------------------------------------------===//
 561 // Misc Pattern Fragments
 562 //===----------------------------------------------------------------------===//
 563
 564 class Constants {
 565 int TWO_PI = 0x40c90fdb;
 566 int PI = 0x40490fdb;
 567 int TWO_PI_INV = 0x3e22f983;
 568 int FP_UINT_MAX_PLUS_1 = 0x4f800000;    // 1 << 32 in floating point encoding
 569 int FP16_ONE = 0x3C00;
 570 int FP16_NEG_ONE = 0xBC00;
 571 int V2FP16_ONE = 0x3C003C00;
 572 int FP32_ONE = 0x3f800000;
 573 int FP32_NEG_ONE = 0xbf800000;
 574 int FP64_ONE = 0x3ff0000000000000;
 575 int FP64_NEG_ONE = 0xbff0000000000000;
 576 }
 577 def CONST : Constants;
 578
 579 def FP_ZERO : PatLeaf <
 580   (fpimm),
 581   [{return N->getValueAPF().isZero();}]
 582 >;
 583
 584 def FP_ONE : PatLeaf <
 585   (fpimm),
 586   [{return N->isExactlyValue(1.0);}]
 587 >;
 588
 589 def FP_HALF : PatLeaf <
 590   (fpimm),
 591   [{return N->isExactlyValue(0.5);}]
 592 >;
 593
 594 /* Generic helper patterns for intrinsics */
 595 /* -------------------------------------- */
 596
 597 class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
 598   : AMDGPUPat <
 599   (fpow f32:$src0, f32:$src1),
 600   (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
 601 >;
 602
 603 /* Other helper patterns */
 604 /* --------------------- */
 605
 606 /* Extract element pattern */
 607 class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
 608                        SubRegIndex sub_reg>
 609   : AMDGPUPat<
 610   (sub_type (extractelt vec_type:$src, sub_idx)),
 611   (EXTRACT_SUBREG $src, sub_reg)
 612 > {
 613   let SubtargetPredicate = TruePredicate;
 614 }
 615
 616 /* Insert element pattern */
 617 class Insert_Element <ValueType elem_type, ValueType vec_type,
 618                       int sub_idx, SubRegIndex sub_reg>
 619   : AMDGPUPat <
 620   (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
 621   (INSERT_SUBREG $vec, $elem, sub_reg)
 622 > {
 623   let SubtargetPredicate = TruePredicate;
 624 }
 625
 626 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
 627 // can handle COPY instructions.
 628 // bitconvert pattern
 629 class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
 630   (dt (bitconvert (st rc:$src0))),
 631   (dt rc:$src0)
 632 >;
 633
 634 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
 635 // can handle COPY instructions.
 636 class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
 637   (vt (AMDGPUdwordaddr (vt rc:$addr))),
 638   (vt rc:$addr)
 639 >;
 640
 641 // BFI_INT patterns
 642
 643 multiclass BFIPatterns <Instruction BFI_INT,
 644                         Instruction LoadImm32,
 645                         RegisterClass RC64> {
 646   // Definition from ISA doc:
 647   // (y & x) | (z & ~x)
 648   def : AMDGPUPat <
 649     (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
 650     (BFI_INT $x, $y, $z)
 651   >;
 652
 653   // 64-bit version
 654   def : AMDGPUPat <
 655     (or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
 656     (REG_SEQUENCE RC64,
 657       (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
 658                (i32 (EXTRACT_SUBREG $y, sub0)),
 659                (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
 660       (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
 661                (i32 (EXTRACT_SUBREG $y, sub1)),
 662                (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
 663   >;
 664
 665   // SHA-256 Ch function
 666   // z ^ (x & (y ^ z))
 667   def : AMDGPUPat <
 668     (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
 669     (BFI_INT $x, $y, $z)
 670   >;
 671
 672   // 64-bit version
 673   def : AMDGPUPat <
 674     (xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
 675     (REG_SEQUENCE RC64,
 676       (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
 677                (i32 (EXTRACT_SUBREG $y, sub0)),
 678                (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
 679       (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
 680                (i32 (EXTRACT_SUBREG $y, sub1)),
 681                (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
 682   >;
 683
 684   def : AMDGPUPat <
 685     (fcopysign f32:$src0, f32:$src1),
 686     (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
 687   >;
 688
 689   def : AMDGPUPat <
 690     (f32 (fcopysign f32:$src0, f64:$src1)),
 691     (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
 692              (i32 (EXTRACT_SUBREG $src1, sub1)))
 693   >;
 694
 695   def : AMDGPUPat <
 696     (f64 (fcopysign f64:$src0, f64:$src1)),
 697     (REG_SEQUENCE RC64,
 698       (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
 699       (BFI_INT (LoadImm32 (i32 0x7fffffff)),
 700                (i32 (EXTRACT_SUBREG $src0, sub1)),
 701                (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
 702   >;
 703
 704   def : AMDGPUPat <
 705     (f64 (fcopysign f64:$src0, f32:$src1)),
 706     (REG_SEQUENCE RC64,
 707       (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
 708       (BFI_INT (LoadImm32 (i32 0x7fffffff)),
 709                (i32 (EXTRACT_SUBREG $src0, sub1)),
 710                $src1), sub1)
 711   >;
 712 }
 713
 714 // SHA-256 Ma patterns
 715
 716 // ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
 717 multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass RC64> {
 718   def : AMDGPUPat <
 719     (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
 720     (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
 721   >;
 722
 723   def : AMDGPUPat <
 724     (or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
 725     (REG_SEQUENCE RC64,
 726       (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub0)),
 727                     (i32 (EXTRACT_SUBREG $y, sub0))),
 728                (i32 (EXTRACT_SUBREG $z, sub0)),
 729                (i32 (EXTRACT_SUBREG $y, sub0))), sub0,
 730       (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub1)),
 731                     (i32 (EXTRACT_SUBREG $y, sub1))),
 732                (i32 (EXTRACT_SUBREG $z, sub1)),
 733                (i32 (EXTRACT_SUBREG $y, sub1))), sub1)
 734   >;
 735 }
 736
 737 // Bitfield extract patterns
 738
 739 def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
 740   return isMask_32(N->getZExtValue());
 741 }]>;
 742
 743 def IMMPopCount : SDNodeXForm<imm, [{
 744   return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
 745                                    MVT::i32);
 746 }]>;
 747
 748 multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
 749   def : AMDGPUPat <
 750     (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
 751     (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
 752   >;
 753
 754   // x & ((1 << y) - 1)
 755   def : AMDGPUPat <
 756     (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
 757     (UBFE $src, (MOV (i32 0)), $width)
 758   >;
 759
 760   // x & ~(-1 << y)
 761   def : AMDGPUPat <
 762     (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
 763     (UBFE $src, (MOV (i32 0)), $width)
 764   >;
 765
 766   // x & (-1 >> (bitwidth - y))
 767   def : AMDGPUPat <
 768     (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
 769     (UBFE $src, (MOV (i32 0)), $width)
 770   >;
 771
 772   // x << (bitwidth - y) >> (bitwidth - y)
 773   def : AMDGPUPat <
 774     (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
 775     (UBFE $src, (MOV (i32 0)), $width)
 776   >;
 777
 778   def : AMDGPUPat <
 779     (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
 780     (SBFE $src, (MOV (i32 0)), $width)
 781   >;
 782 }
 783
 784 // rotr pattern
 785 class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
 786   (rotr i32:$src0, i32:$src1),
 787   (BIT_ALIGN $src0, $src0, $src1)
 788 >;
 789
 790 // This matches 16 permutations of
 791 // max(min(x, y), min(max(x, y), z))
 792 class IntMed3Pat<Instruction med3Inst,
 793                  SDPatternOperator max,
 794                  SDPatternOperator max_oneuse,
 795                  SDPatternOperator min_oneuse,
 796                  ValueType vt = i32> : AMDGPUPat<
 797   (max (min_oneuse vt:$src0, vt:$src1),
 798        (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
 799   (med3Inst $src0, $src1, $src2)
 800 >;
 801
 802 // Special conversion patterns
 803
 804 def cvt_rpi_i32_f32 : PatFrag <
 805   (ops node:$src),
 806   (fp_to_sint (ffloor (fadd $src, FP_HALF))),
 807   [{ (void) N; return TM.Options.NoNaNsFPMath; }]
 808 >;
 809
 810 def cvt_flr_i32_f32 : PatFrag <
 811   (ops node:$src),
 812   (fp_to_sint (ffloor $src)),
 813   [{ (void)N; return TM.Options.NoNaNsFPMath; }]
 814 >;
 815
 816 class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
 817   (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
 818   !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
 819                 (Inst $src0, $src1, $src2))
 820 >;
 821
 822 class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
 823   (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
 824   !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
 825                 (Inst $src0, $src1, $src2))
 826 >;
 827
 828 class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
 829   (fdiv FP_ONE, vt:$src),
 830   (RcpInst $src)
 831 >;
 832
 833 class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
 834   (AMDGPUrcp (fsqrt vt:$src)),
 835   (RsqInst $src)
 836 >;