contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

   1 //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file contains instruction defs that are common to all hw codegen
  10 // targets.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 class AddressSpacesImpl {
  15   int Flat = 0;
  16   int Global = 1;
  17   int Region = 2;
  18   int Local = 3;
  19   int Constant = 4;
  20   int Private = 5;
  21 }
  22
  23 def AddrSpaces : AddressSpacesImpl;
  24
  25
  26 class AMDGPUInst <dag outs, dag ins, string asm = "",
  27   list<dag> pattern = []> : Instruction {
  28   field bit isRegisterLoad = 0;
  29   field bit isRegisterStore = 0;
  30
  31   let Namespace = "AMDGPU";
  32   let OutOperandList = outs;
  33   let InOperandList = ins;
  34   let AsmString = asm;
  35   let Pattern = pattern;
  36   let Itinerary = NullALU;
  37
  38   // SoftFail is a field the disassembler can use to provide a way for
  39   // instructions to not match without killing the whole decode process. It is
  40   // mainly used for ARM, but Tablegen expects this field to exist or it fails
  41   // to build the decode table.
  42   field bits<64> SoftFail = 0;
  43
  44   let DecoderNamespace = Namespace;
  45
  46   let TSFlags{63} = isRegisterLoad;
  47   let TSFlags{62} = isRegisterStore;
  48 }
  49
  50 class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
  51   list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
  52
  53   field bits<32> Inst = 0xffffffff;
  54 }
  55
  56 //===---------------------------------------------------------------------===//
  57 // Return instruction
  58 //===---------------------------------------------------------------------===//
  59
  60 class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
  61 : Instruction {
  62
  63      let Namespace = "AMDGPU";
  64      dag OutOperandList = outs;
  65      dag InOperandList = ins;
  66      let Pattern = pattern;
  67      let AsmString = !strconcat(asmstr, "\n");
  68      let isPseudo = 1;
  69      let Itinerary = NullALU;
  70      bit hasIEEEFlag = 0;
  71      bit hasZeroOpFlag = 0;
  72      let mayLoad = 0;
  73      let mayStore = 0;
  74      let hasSideEffects = 0;
  75      let isCodeGenOnly = 1;
  76 }
  77
  78 def TruePredicate : Predicate<"true">;
  79
  80 class PredicateControl {
  81   Predicate SubtargetPredicate = TruePredicate;
  82   list<Predicate> AssemblerPredicates = [];
  83   Predicate AssemblerPredicate = TruePredicate;
  84   Predicate WaveSizePredicate = TruePredicate;
  85   list<Predicate> OtherPredicates = [];
  86   list<Predicate> Predicates = !listconcat([SubtargetPredicate,
  87                                             AssemblerPredicate,
  88                                             WaveSizePredicate],
  89                                             AssemblerPredicates,
  90                                             OtherPredicates);
  91 }
  92 class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
  93       PredicateControl;
  94
  95 def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
  96 def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
  97 def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
  98 def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
  99 def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
 100 def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
 101 def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
 102 def FMA : Predicate<"Subtarget->hasFMA()">;
 103
 104 def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
 105
 106 def u16ImmTarget : AsmOperandClass {
 107   let Name = "U16Imm";
 108   let RenderMethod = "addImmOperands";
 109 }
 110
 111 def s16ImmTarget : AsmOperandClass {
 112   let Name = "S16Imm";
 113   let RenderMethod = "addImmOperands";
 114 }
 115
 116 let OperandType = "OPERAND_IMMEDIATE" in {
 117
 118 def u32imm : Operand<i32> {
 119   let PrintMethod = "printU32ImmOperand";
 120 }
 121
 122 def u16imm : Operand<i16> {
 123   let PrintMethod = "printU16ImmOperand";
 124   let ParserMatchClass = u16ImmTarget;
 125 }
 126
 127 def s16imm : Operand<i16> {
 128   let PrintMethod = "printU16ImmOperand";
 129   let ParserMatchClass = s16ImmTarget;
 130 }
 131
 132 def u8imm : Operand<i8> {
 133   let PrintMethod = "printU8ImmOperand";
 134 }
 135
 136 } // End OperandType = "OPERAND_IMMEDIATE"
 137
 138 //===--------------------------------------------------------------------===//
 139 // Custom Operands
 140 //===--------------------------------------------------------------------===//
 141 def brtarget   : Operand<OtherVT>;
 142
 143 //===----------------------------------------------------------------------===//
 144 // Misc. PatFrags
 145 //===----------------------------------------------------------------------===//
 146
 147 class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
 148   (ops node:$src0),
 149   (op $src0),
 150   [{ return N->hasOneUse(); }]
 151 >;
 152
 153 class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
 154   (ops node:$src0, node:$src1),
 155   (op $src0, $src1),
 156   [{ return N->hasOneUse(); }]
 157 >;
 158
 159 class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
 160   (ops node:$src0, node:$src1, node:$src2),
 161   (op $src0, $src1, $src2),
 162   [{ return N->hasOneUse(); }]
 163 >;
 164
 165 let Properties = [SDNPCommutative, SDNPAssociative] in {
 166 def smax_oneuse : HasOneUseBinOp<smax>;
 167 def smin_oneuse : HasOneUseBinOp<smin>;
 168 def umax_oneuse : HasOneUseBinOp<umax>;
 169 def umin_oneuse : HasOneUseBinOp<umin>;
 170
 171 def fminnum_oneuse : HasOneUseBinOp<fminnum>;
 172 def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
 173
 174 def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
 175 def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
 176
 177
 178 def and_oneuse : HasOneUseBinOp<and>;
 179 def or_oneuse : HasOneUseBinOp<or>;
 180 def xor_oneuse : HasOneUseBinOp<xor>;
 181 } // Properties = [SDNPCommutative, SDNPAssociative]
 182
 183 def not_oneuse : HasOneUseUnaryOp<not>;
 184
 185 def add_oneuse : HasOneUseBinOp<add>;
 186 def sub_oneuse : HasOneUseBinOp<sub>;
 187
 188 def srl_oneuse : HasOneUseBinOp<srl>;
 189 def shl_oneuse : HasOneUseBinOp<shl>;
 190
 191 def select_oneuse : HasOneUseTernaryOp<select>;
 192
 193 def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
 194 def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
 195
 196 def srl_16 : PatFrag<
 197   (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
 198 >;
 199
 200
 201 def hi_i16_elt : PatFrag<
 202   (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
 203 >;
 204
 205
 206 def hi_f16_elt : PatLeaf<
 207   (vt), [{
 208   if (N->getOpcode() != ISD::BITCAST)
 209     return false;
 210   SDValue Tmp = N->getOperand(0);
 211
 212   if (Tmp.getOpcode() != ISD::SRL)
 213     return false;
 214     if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
 215       return RHS->getZExtValue() == 16;
 216     return false;
 217 }]>;
 218
 219 //===----------------------------------------------------------------------===//
 220 // PatLeafs for floating-point comparisons
 221 //===----------------------------------------------------------------------===//
 222
 223 def COND_OEQ : PatLeaf <
 224   (cond),
 225   [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
 226 >;
 227
 228 def COND_ONE : PatLeaf <
 229   (cond),
 230   [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
 231 >;
 232
 233 def COND_OGT : PatLeaf <
 234   (cond),
 235   [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
 236 >;
 237
 238 def COND_OGE : PatLeaf <
 239   (cond),
 240   [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
 241 >;
 242
 243 def COND_OLT : PatLeaf <
 244   (cond),
 245   [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
 246 >;
 247
 248 def COND_OLE : PatLeaf <
 249   (cond),
 250   [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
 251 >;
 252
 253 def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
 254 def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
 255
 256 //===----------------------------------------------------------------------===//
 257 // PatLeafs for unsigned / unordered comparisons
 258 //===----------------------------------------------------------------------===//
 259
 260 def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
 261 def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
 262 def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
 263 def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
 264 def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
 265 def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
 266
 267 // XXX - For some reason R600 version is preferring to use unordered
 268 // for setne?
 269 def COND_UNE_NE : PatLeaf <
 270   (cond),
 271   [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
 272 >;
 273
 274 //===----------------------------------------------------------------------===//
 275 // PatLeafs for signed comparisons
 276 //===----------------------------------------------------------------------===//
 277
 278 def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
 279 def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
 280 def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
 281 def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
 282
 283 //===----------------------------------------------------------------------===//
 284 // PatLeafs for integer equality
 285 //===----------------------------------------------------------------------===//
 286
 287 def COND_EQ : PatLeaf <
 288   (cond),
 289   [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
 290 >;
 291
 292 def COND_NE : PatLeaf <
 293   (cond),
 294   [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
 295 >;
 296
 297 def COND_NULL : PatLeaf <
 298   (cond),
 299   [{(void)N; return false;}]
 300 >;
 301
 302 //===----------------------------------------------------------------------===//
 303 // PatLeafs for Texture Constants
 304 //===----------------------------------------------------------------------===//
 305
 306 def TEX_ARRAY : PatLeaf<
 307   (imm),
 308   [{uint32_t TType = (uint32_t)N->getZExtValue();
 309     return TType == 9 || TType == 10 || TType == 16;
 310   }]
 311 >;
 312
 313 def TEX_RECT : PatLeaf<
 314   (imm),
 315   [{uint32_t TType = (uint32_t)N->getZExtValue();
 316     return TType == 5;
 317   }]
 318 >;
 319
 320 def TEX_SHADOW : PatLeaf<
 321   (imm),
 322   [{uint32_t TType = (uint32_t)N->getZExtValue();
 323     return (TType >= 6 && TType <= 8) || TType == 13;
 324   }]
 325 >;
 326
 327 def TEX_SHADOW_ARRAY : PatLeaf<
 328   (imm),
 329   [{uint32_t TType = (uint32_t)N->getZExtValue();
 330     return TType == 11 || TType == 12 || TType == 17;
 331   }]
 332 >;
 333
 334 //===----------------------------------------------------------------------===//
 335 // Load/Store Pattern Fragments
 336 //===----------------------------------------------------------------------===//
 337
 338 class AddressSpaceList<list<int> AS> {
 339   list<int> AddrSpaces = AS;
 340 }
 341
 342 class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
 343   return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
 344 }]>;
 345
 346 class Aligned16Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
 347   return cast<MemSDNode>(N)->getAlignment() >= 16;
 348 }]>;
 349
 350 class LoadFrag <SDPatternOperator op> : PatFrag<(ops node:$ptr), (op node:$ptr)>;
 351
 352 class StoreFrag<SDPatternOperator op> : PatFrag <
 353   (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
 354 >;
 355
 356 class StoreHi16<SDPatternOperator op> : PatFrag <
 357   (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
 358 >;
 359
 360 def LoadAddress_constant : AddressSpaceList<[  AddrSpaces.Constant ]>;
 361 def LoadAddress_global : AddressSpaceList<[  AddrSpaces.Global, AddrSpaces.Constant ]>;
 362 def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
 363
 364 def LoadAddress_flat : AddressSpaceList<[  AddrSpaces.Flat,
 365                                            AddrSpaces.Global,
 366                                            AddrSpaces.Constant ]>;
 367 def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
 368
 369 def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
 370 def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
 371
 372 def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
 373 def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
 374
 375 def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
 376 def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
 377
 378
 379
 380 class GlobalLoadAddress : CodePatPred<[{
 381   auto AS = cast<MemSDNode>(N)->getAddressSpace();
 382   return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS;
 383 }]>;
 384
 385 class FlatLoadAddress : CodePatPred<[{
 386   const auto AS = cast<MemSDNode>(N)->getAddressSpace();
 387   return AS == AMDGPUAS::FLAT_ADDRESS ||
 388          AS == AMDGPUAS::GLOBAL_ADDRESS ||
 389          AS == AMDGPUAS::CONSTANT_ADDRESS;
 390 }]>;
 391
 392 class GlobalAddress : CodePatPred<[{
 393   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 394 }]>;
 395
 396 class PrivateAddress : CodePatPred<[{
 397   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
 398 }]>;
 399
 400 class LocalAddress : CodePatPred<[{
 401   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 402 }]>;
 403
 404 class RegionAddress : CodePatPred<[{
 405   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
 406 }]>;
 407
 408 class FlatStoreAddress : CodePatPred<[{
 409   const auto AS = cast<MemSDNode>(N)->getAddressSpace();
 410   return AS == AMDGPUAS::FLAT_ADDRESS ||
 411          AS == AMDGPUAS::GLOBAL_ADDRESS;
 412 }]>;
 413
 414 // TODO: Remove these when stores to new PatFrag format.
 415 class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
 416 class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
 417 class RegionStore <SDPatternOperator op> : StoreFrag <op>, RegionAddress;
 418 class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
 419 class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress;
 420
 421
 422 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
 423 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
 424
 425 def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
 426   let IsLoad = 1;
 427   let IsNonExtLoad = 1;
 428 }
 429
 430 def extloadi8_#as  : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
 431   let IsLoad = 1;
 432   let MemoryVT = i8;
 433 }
 434
 435 def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
 436   let IsLoad = 1;
 437   let MemoryVT = i16;
 438 }
 439
 440 def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
 441   let IsLoad = 1;
 442   let MemoryVT = i8;
 443 }
 444
 445 def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
 446   let IsLoad = 1;
 447   let MemoryVT = i16;
 448 }
 449
 450 def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
 451   let IsLoad = 1;
 452   let MemoryVT = i8;
 453 }
 454
 455 def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
 456   let IsLoad = 1;
 457   let MemoryVT = i16;
 458 }
 459
 460 def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
 461   let IsAtomic = 1;
 462   let MemoryVT = i32;
 463 }
 464
 465 def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
 466   let IsAtomic = 1;
 467   let MemoryVT = i64;
 468 }
 469
 470 def store_#as : PatFrag<(ops node:$val, node:$ptr),
 471                     (unindexedstore node:$val, node:$ptr)> {
 472   let IsStore = 1;
 473   let IsTruncStore = 0;
 474 }
 475
 476 // truncstore fragments.
 477 def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
 478                              (unindexedstore node:$val, node:$ptr)> {
 479   let IsStore = 1;
 480   let IsTruncStore = 1;
 481 }
 482
 483 // TODO: We don't really need the truncstore here. We can use
 484 // unindexedstore with MemoryVT directly, which will save an
 485 // unnecessary check that the memory size is less than the value type
 486 // in the generated matcher table.
 487 def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
 488                                (truncstore node:$val, node:$ptr)> {
 489   let IsStore = 1;
 490   let MemoryVT = i8;
 491 }
 492
 493 def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
 494                                 (truncstore node:$val, node:$ptr)> {
 495   let IsStore = 1;
 496   let MemoryVT = i16;
 497 }
 498
 499 defm atomic_store_#as : binary_atomic_op<atomic_store>;
 500
 501 } // End let AddressSpaces = ...
 502 } // End foreach AddrSpace
 503
 504
 505 def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
 506 def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
 507
 508 def store_atomic_global : GlobalStore<atomic_store>;
 509 def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
 510 def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
 511
 512 def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
 513 def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
 514 def atomic_store_local : LocalStore <atomic_store>;
 515
 516 def load_align8_local : Aligned8Bytes <
 517   (ops node:$ptr), (load_local node:$ptr)
 518 >;
 519
 520 def load_align16_local : Aligned16Bytes <
 521   (ops node:$ptr), (load_local node:$ptr)
 522 >;
 523
 524 def store_align8_local : Aligned8Bytes <
 525   (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
 526 >;
 527
 528 def store_align16_local : Aligned16Bytes <
 529   (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
 530 >;
 531
 532 def atomic_store_flat  : FlatStore <atomic_store>;
 533 def truncstorei8_hi16_flat  : StoreHi16<truncstorei8>, FlatStoreAddress;
 534 def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
 535
 536
 537 class local_binary_atomic_op<SDNode atomic_op> :
 538   PatFrag<(ops node:$ptr, node:$value),
 539     (atomic_op node:$ptr, node:$value), [{
 540   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 541 }]>;
 542
 543 class region_binary_atomic_op<SDNode atomic_op> :
 544   PatFrag<(ops node:$ptr, node:$value),
 545     (atomic_op node:$ptr, node:$value), [{
 546   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
 547 }]>;
 548
 549
 550 def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
 551 def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
 552 def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
 553 def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
 554 def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
 555 def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
 556 def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
 557 def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
 558 def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
 559 def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
 560 def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
 561
 562 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
 563                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
 564   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 565 }]>;
 566
 567 class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
 568     (ops node:$ptr, node:$cmp, node:$swap),
 569     (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
 570       AtomicSDNode *AN = cast<AtomicSDNode>(N);
 571       return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 572 }]>;
 573
 574 class AtomicCmpSwapRegion <SDNode cmp_swap_node> : PatFrag<
 575     (ops node:$ptr, node:$cmp, node:$swap),
 576     (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
 577       AtomicSDNode *AN = cast<AtomicSDNode>(N);
 578       return AN->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
 579 }]>;
 580
 581 def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
 582
 583 class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag<
 584     (ops node:$ptr, node:$value),
 585     (atomic_op node:$ptr, node:$value),
 586     [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
 587
 588 multiclass global_binary_atomic_op<SDNode atomic_op> {
 589   def "" : global_binary_atomic_op_frag<atomic_op>;
 590
 591   def _noret : PatFrag<
 592         (ops node:$ptr, node:$value),
 593         (atomic_op node:$ptr, node:$value),
 594         [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
 595
 596   def _ret : PatFrag<
 597         (ops node:$ptr, node:$value),
 598         (atomic_op node:$ptr, node:$value),
 599         [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
 600 }
 601
 602 defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
 603 defm atomic_add_global : global_binary_atomic_op<atomic_load_add>;
 604 defm atomic_and_global : global_binary_atomic_op<atomic_load_and>;
 605 defm atomic_max_global : global_binary_atomic_op<atomic_load_max>;
 606 defm atomic_min_global : global_binary_atomic_op<atomic_load_min>;
 607 defm atomic_or_global : global_binary_atomic_op<atomic_load_or>;
 608 defm atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
 609 defm atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
 610 defm atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
 611 defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
 612
 613 // Legacy.
 614 def AMDGPUatomic_cmp_swap_global : PatFrag<
 615   (ops node:$ptr, node:$value),
 616   (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress;
 617
 618 def atomic_cmp_swap_global : PatFrag<
 619   (ops node:$ptr, node:$cmp, node:$value),
 620   (atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress;
 621
 622
 623 def atomic_cmp_swap_global_noret : PatFrag<
 624   (ops node:$ptr, node:$cmp, node:$value),
 625   (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
 626   [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
 627
 628 def atomic_cmp_swap_global_ret : PatFrag<
 629   (ops node:$ptr, node:$cmp, node:$value),
 630   (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
 631   [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
 632
 633 //===----------------------------------------------------------------------===//
 634 // Misc Pattern Fragments
 635 //===----------------------------------------------------------------------===//
 636
 637 class Constants {
 638 int TWO_PI = 0x40c90fdb;
 639 int PI = 0x40490fdb;
 640 int TWO_PI_INV = 0x3e22f983;
 641 int FP_UINT_MAX_PLUS_1 = 0x4f800000;    // 1 << 32 in floating point encoding
 642 int FP16_ONE = 0x3C00;
 643 int FP16_NEG_ONE = 0xBC00;
 644 int FP32_ONE = 0x3f800000;
 645 int FP32_NEG_ONE = 0xbf800000;
 646 int FP64_ONE = 0x3ff0000000000000;
 647 int FP64_NEG_ONE = 0xbff0000000000000;
 648 }
 649 def CONST : Constants;
 650
 651 def FP_ZERO : PatLeaf <
 652   (fpimm),
 653   [{return N->getValueAPF().isZero();}]
 654 >;
 655
 656 def FP_ONE : PatLeaf <
 657   (fpimm),
 658   [{return N->isExactlyValue(1.0);}]
 659 >;
 660
 661 def FP_HALF : PatLeaf <
 662   (fpimm),
 663   [{return N->isExactlyValue(0.5);}]
 664 >;
 665
 666 /* Generic helper patterns for intrinsics */
 667 /* -------------------------------------- */
 668
 669 class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
 670   : AMDGPUPat <
 671   (fpow f32:$src0, f32:$src1),
 672   (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
 673 >;
 674
 675 /* Other helper patterns */
 676 /* --------------------- */
 677
 678 /* Extract element pattern */
 679 class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
 680                        SubRegIndex sub_reg>
 681   : AMDGPUPat<
 682   (sub_type (extractelt vec_type:$src, sub_idx)),
 683   (EXTRACT_SUBREG $src, sub_reg)
 684 >;
 685
 686 /* Insert element pattern */
 687 class Insert_Element <ValueType elem_type, ValueType vec_type,
 688                       int sub_idx, SubRegIndex sub_reg>
 689   : AMDGPUPat <
 690   (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
 691   (INSERT_SUBREG $vec, $elem, sub_reg)
 692 >;
 693
 694 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
 695 // can handle COPY instructions.
 696 // bitconvert pattern
 697 class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
 698   (dt (bitconvert (st rc:$src0))),
 699   (dt rc:$src0)
 700 >;
 701
 702 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
 703 // can handle COPY instructions.
 704 class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
 705   (vt (AMDGPUdwordaddr (vt rc:$addr))),
 706   (vt rc:$addr)
 707 >;
 708
 709 // BFI_INT patterns
 710
 711 multiclass BFIPatterns <Instruction BFI_INT,
 712                         Instruction LoadImm32,
 713                         RegisterClass RC64> {
 714   // Definition from ISA doc:
 715   // (y & x) | (z & ~x)
 716   def : AMDGPUPat <
 717     (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
 718     (BFI_INT $x, $y, $z)
 719   >;
 720
 721   // 64-bit version
 722   def : AMDGPUPat <
 723     (or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
 724     (REG_SEQUENCE RC64,
 725       (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
 726                (i32 (EXTRACT_SUBREG $y, sub0)),
 727                (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
 728       (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
 729                (i32 (EXTRACT_SUBREG $y, sub1)),
 730                (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
 731   >;
 732
 733   // SHA-256 Ch function
 734   // z ^ (x & (y ^ z))
 735   def : AMDGPUPat <
 736     (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
 737     (BFI_INT $x, $y, $z)
 738   >;
 739
 740   // 64-bit version
 741   def : AMDGPUPat <
 742     (xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
 743     (REG_SEQUENCE RC64,
 744       (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
 745                (i32 (EXTRACT_SUBREG $y, sub0)),
 746                (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
 747       (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
 748                (i32 (EXTRACT_SUBREG $y, sub1)),
 749                (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
 750   >;
 751
 752   def : AMDGPUPat <
 753     (fcopysign f32:$src0, f32:$src1),
 754     (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
 755   >;
 756
 757   def : AMDGPUPat <
 758     (f32 (fcopysign f32:$src0, f64:$src1)),
 759     (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
 760              (i32 (EXTRACT_SUBREG $src1, sub1)))
 761   >;
 762
 763   def : AMDGPUPat <
 764     (f64 (fcopysign f64:$src0, f64:$src1)),
 765     (REG_SEQUENCE RC64,
 766       (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
 767       (BFI_INT (LoadImm32 (i32 0x7fffffff)),
 768                (i32 (EXTRACT_SUBREG $src0, sub1)),
 769                (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
 770   >;
 771
 772   def : AMDGPUPat <
 773     (f64 (fcopysign f64:$src0, f32:$src1)),
 774     (REG_SEQUENCE RC64,
 775       (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
 776       (BFI_INT (LoadImm32 (i32 0x7fffffff)),
 777                (i32 (EXTRACT_SUBREG $src0, sub1)),
 778                $src1), sub1)
 779   >;
 780 }
 781
 782 // SHA-256 Ma patterns
 783
 784 // ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
 785 multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass RC64> {
 786   def : AMDGPUPat <
 787     (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
 788     (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
 789   >;
 790
 791   def : AMDGPUPat <
 792     (or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
 793     (REG_SEQUENCE RC64,
 794       (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub0)),
 795                     (i32 (EXTRACT_SUBREG $y, sub0))),
 796                (i32 (EXTRACT_SUBREG $z, sub0)),
 797                (i32 (EXTRACT_SUBREG $y, sub0))), sub0,
 798       (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub1)),
 799                     (i32 (EXTRACT_SUBREG $y, sub1))),
 800                (i32 (EXTRACT_SUBREG $z, sub1)),
 801                (i32 (EXTRACT_SUBREG $y, sub1))), sub1)
 802   >;
 803 }
 804
 805 // Bitfield extract patterns
 806
 807 def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
 808   return isMask_32(N->getZExtValue());
 809 }]>;
 810
 811 def IMMPopCount : SDNodeXForm<imm, [{
 812   return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
 813                                    MVT::i32);
 814 }]>;
 815
 816 multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
 817   def : AMDGPUPat <
 818     (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
 819     (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
 820   >;
 821
 822   // x & ((1 << y) - 1)
 823   def : AMDGPUPat <
 824     (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
 825     (UBFE $src, (MOV (i32 0)), $width)
 826   >;
 827
 828   // x & ~(-1 << y)
 829   def : AMDGPUPat <
 830     (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
 831     (UBFE $src, (MOV (i32 0)), $width)
 832   >;
 833
 834   // x & (-1 >> (bitwidth - y))
 835   def : AMDGPUPat <
 836     (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
 837     (UBFE $src, (MOV (i32 0)), $width)
 838   >;
 839
 840   // x << (bitwidth - y) >> (bitwidth - y)
 841   def : AMDGPUPat <
 842     (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
 843     (UBFE $src, (MOV (i32 0)), $width)
 844   >;
 845
 846   def : AMDGPUPat <
 847     (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
 848     (SBFE $src, (MOV (i32 0)), $width)
 849   >;
 850 }
 851
 852 // rotr pattern
 853 class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
 854   (rotr i32:$src0, i32:$src1),
 855   (BIT_ALIGN $src0, $src0, $src1)
 856 >;
 857
 858 multiclass IntMed3Pat<Instruction med3Inst,
 859                  SDPatternOperator min,
 860                  SDPatternOperator max,
 861                  SDPatternOperator min_oneuse,
 862                  SDPatternOperator max_oneuse,
 863                  ValueType vt = i32> {
 864
 865   // This matches 16 permutations of
 866   // min(max(a, b), max(min(a, b), c))
 867   def : AMDGPUPat <
 868   (min (max_oneuse vt:$src0, vt:$src1),
 869        (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
 870   (med3Inst vt:$src0, vt:$src1, vt:$src2)
 871 >;
 872
 873   // This matches 16 permutations of
 874   // max(min(x, y), min(max(x, y), z))
 875   def : AMDGPUPat <
 876   (max (min_oneuse vt:$src0, vt:$src1),
 877        (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
 878   (med3Inst $src0, $src1, $src2)
 879 >;
 880 }
 881
 882 // Special conversion patterns
 883
 884 def cvt_rpi_i32_f32 : PatFrag <
 885   (ops node:$src),
 886   (fp_to_sint (ffloor (fadd $src, FP_HALF))),
 887   [{ (void) N; return TM.Options.NoNaNsFPMath; }]
 888 >;
 889
 890 def cvt_flr_i32_f32 : PatFrag <
 891   (ops node:$src),
 892   (fp_to_sint (ffloor $src)),
 893   [{ (void)N; return TM.Options.NoNaNsFPMath; }]
 894 >;
 895
 896 let AddedComplexity = 2 in {
 897 class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
 898   (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
 899   !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
 900                 (Inst $src0, $src1, $src2))
 901 >;
 902
 903 class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
 904   (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
 905   !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
 906                 (Inst $src0, $src1, $src2))
 907 >;
 908 } // AddedComplexity.
 909
 910 class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
 911   (fdiv FP_ONE, vt:$src),
 912   (RcpInst $src)
 913 >;
 914
 915 class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
 916   (AMDGPUrcp (fsqrt vt:$src)),
 917   (RsqInst $src)
 918 >;
 919
 920 // Instructions which select to the same v_min_f*
 921 def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
 922   [(fminnum_ieee node:$src0, node:$src1),
 923    (fminnum node:$src0, node:$src1)]
 924 >;
 925
 926 // Instructions which select to the same v_max_f*
 927 def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
 928   [(fmaxnum_ieee node:$src0, node:$src1),
 929    (fmaxnum node:$src0, node:$src1)]
 930 >;
 931
 932 def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
 933   [(fminnum_ieee_oneuse node:$src0, node:$src1),
 934    (fminnum_oneuse node:$src0, node:$src1)]
 935 >;
 936
 937 def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
 938   [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
 939    (fmaxnum_oneuse node:$src0, node:$src1)]
 940 >;