contrib/llvm/lib/Target/AMDGPU/SIInstructions.td

   1 //===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 // This file was originally auto-generated from a GPU register header file and
  10 // all the instruction definitions were originally commented out.  Instructions
  11 // that are not yet supported remain commented out.
  12 //===----------------------------------------------------------------------===//
  13
  14 def isGCN : Predicate<"Subtarget->getGeneration() "
  15                       ">= SISubtarget::SOUTHERN_ISLANDS">,
  16             AssemblerPredicate<"FeatureGCN">;
  17 def isSI : Predicate<"Subtarget->getGeneration() "
  18                       "== SISubtarget::SOUTHERN_ISLANDS">,
  19            AssemblerPredicate<"FeatureSouthernIslands">;
  20
  21 def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
  22 def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
  23 def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
  24                       AssemblerPredicate<"FeatureVGPRIndexMode">;
  25 def HasMovrel : Predicate<"Subtarget->hasMovrel()">,
  26                 AssemblerPredicate<"FeatureMovrel">;
  27
  28 include "VOPInstructions.td"
  29 include "SOPInstructions.td"
  30 include "SMInstructions.td"
  31 include "FLATInstructions.td"
  32 include "BUFInstructions.td"
  33
  34 let SubtargetPredicate = isGCN in {
  35
  36 //===----------------------------------------------------------------------===//
  37 // EXP Instructions
  38 //===----------------------------------------------------------------------===//
  39
  40 defm EXP : EXP_m<0, AMDGPUexport>;
  41 defm EXP_DONE : EXP_m<1, AMDGPUexport_done>;
  42
  43 //===----------------------------------------------------------------------===//
  44 // VINTRP Instructions
  45 //===----------------------------------------------------------------------===//
  46
  47 let Uses = [M0, EXEC] in {
  48
  49 // FIXME: Specify SchedRW for VINTRP insturctions.
  50
  51 multiclass V_INTERP_P1_F32_m : VINTRP_m <
  52   0x00000000,
  53   (outs VGPR_32:$vdst),
  54   (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
  55   "v_interp_p1_f32 $vdst, $vsrc, $attr$attrchan",
  56   [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan),
  57                                                (i32 imm:$attr)))]
  58 >;
  59
  60 let OtherPredicates = [has32BankLDS] in {
  61
  62 defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
  63
  64 } // End OtherPredicates = [has32BankLDS]
  65
  66 let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $vdst", isAsmParserOnly=1 in {
  67
  68 defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
  69
  70 } // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $vdst", isAsmParserOnly=1
  71
  72 let DisableEncoding = "$src0", Constraints = "$src0 = $vdst" in {
  73
  74 defm V_INTERP_P2_F32 : VINTRP_m <
  75   0x00000001,
  76   (outs VGPR_32:$vdst),
  77   (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
  78   "v_interp_p2_f32 $vdst, $vsrc, $attr$attrchan",
  79   [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan),
  80                                                           (i32 imm:$attr)))]>;
  81
  82 } // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
  83
  84 defm V_INTERP_MOV_F32 : VINTRP_m <
  85   0x00000002,
  86   (outs VGPR_32:$vdst),
  87   (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
  88   "v_interp_mov_f32 $vdst, $vsrc, $attr$attrchan",
  89   [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan),
  90                                      (i32 imm:$attr)))]>;
  91
  92 } // End Uses = [M0, EXEC]
  93
  94 //===----------------------------------------------------------------------===//
  95 // Pseudo Instructions
  96 //===----------------------------------------------------------------------===//
  97
  98 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
  99
 100 // For use in patterns
 101 def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
 102   (ins VSrc_b64:$src0, VSrc_b64:$src1, SSrc_b64:$src2), "", []> {
 103   let isPseudo = 1;
 104   let isCodeGenOnly = 1;
 105   let usesCustomInserter = 1;
 106 }
 107
 108 // 64-bit vector move instruction.  This is mainly used by the SIFoldOperands
 109 // pass to enable folding of inline immediates.
 110 def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
 111                                       (ins VSrc_b64:$src0)>;
 112 } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
 113
 114 let usesCustomInserter = 1, SALU = 1 in {
 115 def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
 116   [(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
 117 } // End let usesCustomInserter = 1, SALU = 1
 118
 119 def S_MOV_B64_term : PseudoInstSI<(outs SReg_64:$dst),
 120    (ins SSrc_b64:$src0)> {
 121   let SALU = 1;
 122   let isAsCheapAsAMove = 1;
 123   let isTerminator = 1;
 124 }
 125
 126 def S_XOR_B64_term : PseudoInstSI<(outs SReg_64:$dst),
 127    (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
 128   let SALU = 1;
 129   let isAsCheapAsAMove = 1;
 130   let isTerminator = 1;
 131 }
 132
 133 def S_ANDN2_B64_term : PseudoInstSI<(outs SReg_64:$dst),
 134    (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
 135   let SALU = 1;
 136   let isAsCheapAsAMove = 1;
 137   let isTerminator = 1;
 138 }
 139
 140 def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
 141   [(int_amdgcn_wave_barrier)]> {
 142   let SchedRW = [];
 143   let hasNoSchedulingInfo = 1;
 144   let hasSideEffects = 1;
 145   let mayLoad = 1;
 146   let mayStore = 1;
 147   let isBarrier = 1;
 148   let isConvergent = 1;
 149 }
 150
 151 // SI pseudo instructions. These are used by the CFG structurizer pass
 152 // and should be lowered to ISA instructions prior to codegen.
 153
 154 // Dummy terminator instruction to use after control flow instructions
 155 // replaced with exec mask operations.
 156 def SI_MASK_BRANCH : PseudoInstSI <
 157   (outs), (ins brtarget:$target)> {
 158   let isBranch = 0;
 159   let isTerminator = 1;
 160   let isBarrier = 0;
 161   let Uses = [EXEC];
 162   let SchedRW = [];
 163   let hasNoSchedulingInfo = 1;
 164 }
 165
 166 let isTerminator = 1 in {
 167
 168 def SI_IF: CFPseudoInstSI <
 169   (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
 170   [(set i64:$dst, (int_amdgcn_if i1:$vcc, bb:$target))], 1, 1> {
 171   let Constraints = "";
 172   let Size = 12;
 173   let mayLoad = 1;
 174   let mayStore = 1;
 175   let hasSideEffects = 1;
 176 }
 177
 178 def SI_ELSE : CFPseudoInstSI <
 179   (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
 180   let Constraints = "$src = $dst";
 181   let Size = 12;
 182   let mayStore = 1;
 183   let mayLoad = 1;
 184   let hasSideEffects = 1;
 185 }
 186
 187 def SI_LOOP : CFPseudoInstSI <
 188   (outs), (ins SReg_64:$saved, brtarget:$target),
 189   [(int_amdgcn_loop i64:$saved, bb:$target)], 1, 1> {
 190   let Size = 8;
 191   let isBranch = 1;
 192   let hasSideEffects = 1;
 193   let mayLoad = 1;
 194   let mayStore = 1;
 195 }
 196
 197 } // End isBranch = 1, isTerminator = 1
 198
 199 def SI_END_CF : CFPseudoInstSI <
 200   (outs), (ins SReg_64:$saved),
 201   [(int_amdgcn_end_cf i64:$saved)], 1, 1> {
 202   let Size = 4;
 203   let isAsCheapAsAMove = 1;
 204   let isReMaterializable = 1;
 205   let mayLoad = 1;
 206   let mayStore = 1;
 207   let hasSideEffects = 1;
 208 }
 209
 210 def SI_BREAK : CFPseudoInstSI <
 211   (outs SReg_64:$dst), (ins SReg_64:$src),
 212   [(set i64:$dst, (int_amdgcn_break i64:$src))], 1> {
 213   let Size = 4;
 214   let isAsCheapAsAMove = 1;
 215   let isReMaterializable = 1;
 216 }
 217
 218 def SI_IF_BREAK : CFPseudoInstSI <
 219   (outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
 220   [(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
 221   let Size = 4;
 222   let isAsCheapAsAMove = 1;
 223   let isReMaterializable = 1;
 224 }
 225
 226 def SI_ELSE_BREAK : CFPseudoInstSI <
 227   (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1),
 228   [(set i64:$dst, (int_amdgcn_else_break i64:$src0, i64:$src1))]> {
 229   let Size = 4;
 230   let isAsCheapAsAMove = 1;
 231   let isReMaterializable = 1;
 232 }
 233
 234 let Uses = [EXEC], Defs = [EXEC,VCC] in {
 235 def SI_KILL : PseudoInstSI <
 236   (outs), (ins VSrc_b32:$src),
 237   [(AMDGPUkill i32:$src)]> {
 238   let isConvergent = 1;
 239   let usesCustomInserter = 1;
 240 }
 241
 242 def SI_KILL_TERMINATOR : SPseudoInstSI <
 243   (outs), (ins VSrc_b32:$src)> {
 244   let isTerminator = 1;
 245 }
 246
 247 } // End Uses = [EXEC], Defs = [EXEC,VCC]
 248
 249 // Branch on undef scc. Used to avoid intermediate copy from
 250 // IMPLICIT_DEF to SCC.
 251 def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
 252   let isTerminator = 1;
 253   let usesCustomInserter = 1;
 254 }
 255
 256 def SI_PS_LIVE : PseudoInstSI <
 257   (outs SReg_64:$dst), (ins),
 258   [(set i1:$dst, (int_amdgcn_ps_live))]> {
 259   let SALU = 1;
 260 }
 261
 262 // Used as an isel pseudo to directly emit initialization with an
 263 // s_mov_b32 rather than a copy of another initialized
 264 // register. MachineCSE skips copies, and we don't want to have to
 265 // fold operands before it runs.
 266 def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> {
 267   let Defs = [M0];
 268   let usesCustomInserter = 1;
 269   let isAsCheapAsAMove = 1;
 270   let isReMaterializable = 1;
 271 }
 272
 273 def SI_RETURN : SPseudoInstSI <
 274   (outs), (ins variable_ops), [(AMDGPUreturn)]> {
 275   let isTerminator = 1;
 276   let isBarrier = 1;
 277   let isReturn = 1;
 278   let hasSideEffects = 1;
 279   let hasNoSchedulingInfo = 1;
 280   let DisableWQM = 1;
 281 }
 282
 283 let Defs = [M0, EXEC],
 284   UseNamedOperandTable = 1 in {
 285
 286 class SI_INDIRECT_SRC<RegisterClass rc> : VPseudoInstSI <
 287   (outs VGPR_32:$vdst),
 288   (ins rc:$src, VS_32:$idx, i32imm:$offset)> {
 289   let usesCustomInserter = 1;
 290 }
 291
 292 class SI_INDIRECT_DST<RegisterClass rc> : VPseudoInstSI <
 293   (outs rc:$vdst),
 294   (ins rc:$src, VS_32:$idx, i32imm:$offset, VGPR_32:$val)> {
 295   let Constraints = "$src = $vdst";
 296   let usesCustomInserter = 1;
 297 }
 298
 299 // TODO: We can support indirect SGPR access.
 300 def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
 301 def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
 302 def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
 303 def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;
 304 def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC<VReg_512>;
 305
 306 def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
 307 def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
 308 def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
 309 def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
 310 def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
 311
 312 } // End Uses = [EXEC], Defs = [M0, EXEC]
 313
 314 multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
 315   let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in {
 316     def _SAVE : PseudoInstSI <
 317       (outs),
 318       (ins sgpr_class:$data, i32imm:$addr)> {
 319       let mayStore = 1;
 320       let mayLoad = 0;
 321     }
 322
 323     def _RESTORE : PseudoInstSI <
 324       (outs sgpr_class:$data),
 325       (ins i32imm:$addr)> {
 326       let mayStore = 0;
 327       let mayLoad = 1;
 328     }
 329   } // End UseNamedOperandTable = 1
 330 }
 331
 332 // You cannot use M0 as the output of v_readlane_b32 instructions or
 333 // use it in the sdata operand of SMEM instructions. We still need to
 334 // be able to spill the physical register m0, so allow it for
 335 // SI_SPILL_32_* instructions.
 336 defm SI_SPILL_S32  : SI_SPILL_SGPR <SReg_32>;
 337 defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
 338 defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
 339 defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
 340 defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
 341
 342 multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
 343   let UseNamedOperandTable = 1, VGPRSpill = 1,
 344        SchedRW = [WriteVMEM] in {
 345     def _SAVE : VPseudoInstSI <
 346       (outs),
 347       (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc,
 348            SReg_32:$soffset, i32imm:$offset)> {
 349       let mayStore = 1;
 350       let mayLoad = 0;
 351       // (2 * 4) + (8 * num_subregs) bytes maximum
 352       let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
 353     }
 354
 355     def _RESTORE : VPseudoInstSI <
 356       (outs vgpr_class:$vdata),
 357       (ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset,
 358            i32imm:$offset)> {
 359       let mayStore = 0;
 360       let mayLoad = 1;
 361
 362       // (2 * 4) + (8 * num_subregs) bytes maximum
 363       let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
 364     }
 365   } // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
 366 }
 367
 368 defm SI_SPILL_V32  : SI_SPILL_VGPR <VGPR_32>;
 369 defm SI_SPILL_V64  : SI_SPILL_VGPR <VReg_64>;
 370 defm SI_SPILL_V96  : SI_SPILL_VGPR <VReg_96>;
 371 defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
 372 defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
 373 defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
 374
 375 def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
 376   (outs SReg_64:$dst),
 377   (ins si_ga:$ptr_lo, si_ga:$ptr_hi),
 378   [(set SReg_64:$dst,
 379    (i64 (SIpc_add_rel_offset (tglobaladdr:$ptr_lo), (tglobaladdr:$ptr_hi))))]> {
 380   let Defs = [SCC];
 381 }
 382
 383 } // End SubtargetPredicate = isGCN
 384
 385 let Predicates = [isGCN] in {
 386
 387 def : Pat<
 388   (int_amdgcn_else i64:$src, bb:$target),
 389   (SI_ELSE $src, $target, 0)
 390 >;
 391
 392 def : Pat <
 393   (int_AMDGPU_kilp),
 394   (SI_KILL (i32 0xbf800000))
 395 >;
 396
 397 //===----------------------------------------------------------------------===//
 398 // VOP1 Patterns
 399 //===----------------------------------------------------------------------===//
 400
 401 let Predicates = [UnsafeFPMath] in {
 402
 403 //def : RcpPat<V_RCP_F64_e32, f64>;
 404 //defm : RsqPat<V_RSQ_F64_e32, f64>;
 405 //defm : RsqPat<V_RSQ_F32_e32, f32>;
 406
 407 def : RsqPat<V_RSQ_F32_e32, f32>;
 408 def : RsqPat<V_RSQ_F64_e32, f64>;
 409
 410 // Convert (x - floor(x)) to fract(x)
 411 def : Pat <
 412   (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
 413              (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
 414   (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
 415 >;
 416
 417 // Convert (x + (-floor(x))) to fract(x)
 418 def : Pat <
 419   (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
 420              (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
 421   (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
 422 >;
 423
 424 } // End Predicates = [UnsafeFPMath]
 425
 426 def : Pat <
 427   (f32 (fpextend f16:$src)),
 428   (V_CVT_F32_F16_e32 $src)
 429 >;
 430
 431 def : Pat <
 432   (f64 (fpextend f16:$src)),
 433   (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src))
 434 >;
 435
 436 def : Pat <
 437   (f16 (fpround f32:$src)),
 438   (V_CVT_F16_F32_e32 $src)
 439 >;
 440
 441 def : Pat <
 442   (f16 (fpround f64:$src)),
 443   (V_CVT_F16_F32_e32 (V_CVT_F32_F64_e32 $src))
 444 >;
 445
 446 def : Pat <
 447   (i32 (fp_to_sint f16:$src)),
 448   (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 $src))
 449 >;
 450
 451 def : Pat <
 452   (i32 (fp_to_uint f16:$src)),
 453   (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 $src))
 454 >;
 455
 456 def : Pat <
 457   (f16 (sint_to_fp i32:$src)),
 458   (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 $src))
 459 >;
 460
 461 def : Pat <
 462   (f16 (uint_to_fp i32:$src)),
 463   (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 $src))
 464 >;
 465
 466 //===----------------------------------------------------------------------===//
 467 // VOP2 Patterns
 468 //===----------------------------------------------------------------------===//
 469
 470 multiclass FMADPat <ValueType vt, Instruction inst> {
 471   def : Pat <
 472     (vt (fmad (VOP3NoMods0 vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
 473               (VOP3NoMods  vt:$src1, i32:$src1_modifiers),
 474               (VOP3NoMods  vt:$src2, i32:$src2_modifiers))),
 475     (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
 476           $src2_modifiers, $src2, $clamp, $omod)
 477   >;
 478 }
 479
 480 defm : FMADPat <f16, V_MAC_F16_e64>;
 481 defm : FMADPat <f32, V_MAC_F32_e64>;
 482
 483 multiclass SelectPat <ValueType vt, Instruction inst> {
 484   def : Pat <
 485     (vt (select i1:$src0, vt:$src1, vt:$src2)),
 486     (inst $src2, $src1, $src0)
 487   >;
 488 }
 489
 490 defm : SelectPat <i16, V_CNDMASK_B32_e64>;
 491 defm : SelectPat <i32, V_CNDMASK_B32_e64>;
 492 defm : SelectPat <f16, V_CNDMASK_B32_e64>;
 493 defm : SelectPat <f32, V_CNDMASK_B32_e64>;
 494
 495 def : Pat <
 496   (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
 497   (V_BCNT_U32_B32_e64 $popcnt, $val)
 498 >;
 499
 500 /********** ============================================ **********/
 501 /********** Extraction, Insertion, Building and Casting  **********/
 502 /********** ============================================ **********/
 503
 504 foreach Index = 0-2 in {
 505   def Extract_Element_v2i32_#Index : Extract_Element <
 506     i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
 507   >;
 508   def Insert_Element_v2i32_#Index : Insert_Element <
 509     i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
 510   >;
 511
 512   def Extract_Element_v2f32_#Index : Extract_Element <
 513     f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
 514   >;
 515   def Insert_Element_v2f32_#Index : Insert_Element <
 516     f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
 517   >;
 518 }
 519
 520 foreach Index = 0-3 in {
 521   def Extract_Element_v4i32_#Index : Extract_Element <
 522     i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
 523   >;
 524   def Insert_Element_v4i32_#Index : Insert_Element <
 525     i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
 526   >;
 527
 528   def Extract_Element_v4f32_#Index : Extract_Element <
 529     f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
 530   >;
 531   def Insert_Element_v4f32_#Index : Insert_Element <
 532     f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
 533   >;
 534 }
 535
 536 foreach Index = 0-7 in {
 537   def Extract_Element_v8i32_#Index : Extract_Element <
 538     i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
 539   >;
 540   def Insert_Element_v8i32_#Index : Insert_Element <
 541     i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
 542   >;
 543
 544   def Extract_Element_v8f32_#Index : Extract_Element <
 545     f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
 546   >;
 547   def Insert_Element_v8f32_#Index : Insert_Element <
 548     f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
 549   >;
 550 }
 551
 552 foreach Index = 0-15 in {
 553   def Extract_Element_v16i32_#Index : Extract_Element <
 554     i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
 555   >;
 556   def Insert_Element_v16i32_#Index : Insert_Element <
 557     i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
 558   >;
 559
 560   def Extract_Element_v16f32_#Index : Extract_Element <
 561     f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
 562   >;
 563   def Insert_Element_v16f32_#Index : Insert_Element <
 564     f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
 565   >;
 566 }
 567
 568 // FIXME: Why do only some of these type combinations for SReg and
 569 // VReg?
 570 // 16-bit bitcast
 571 def : BitConvert <i16, f16, VGPR_32>;
 572 def : BitConvert <f16, i16, VGPR_32>;
 573 def : BitConvert <i16, f16, SReg_32>;
 574 def : BitConvert <f16, i16, SReg_32>;
 575
 576 // 32-bit bitcast
 577 def : BitConvert <i32, f32, VGPR_32>;
 578 def : BitConvert <f32, i32, VGPR_32>;
 579 def : BitConvert <i32, f32, SReg_32>;
 580 def : BitConvert <f32, i32, SReg_32>;
 581
 582 // 64-bit bitcast
 583 def : BitConvert <i64, f64, VReg_64>;
 584 def : BitConvert <f64, i64, VReg_64>;
 585 def : BitConvert <v2i32, v2f32, VReg_64>;
 586 def : BitConvert <v2f32, v2i32, VReg_64>;
 587 def : BitConvert <i64, v2i32, VReg_64>;
 588 def : BitConvert <v2i32, i64, VReg_64>;
 589 def : BitConvert <i64, v2f32, VReg_64>;
 590 def : BitConvert <v2f32, i64, VReg_64>;
 591 def : BitConvert <f64, v2f32, VReg_64>;
 592 def : BitConvert <v2f32, f64, VReg_64>;
 593 def : BitConvert <f64, v2i32, VReg_64>;
 594 def : BitConvert <v2i32, f64, VReg_64>;
 595 def : BitConvert <v4i32, v4f32, VReg_128>;
 596 def : BitConvert <v4f32, v4i32, VReg_128>;
 597
 598 // 128-bit bitcast
 599 def : BitConvert <v2i64, v4i32, SReg_128>;
 600 def : BitConvert <v4i32, v2i64, SReg_128>;
 601 def : BitConvert <v2f64, v4f32, VReg_128>;
 602 def : BitConvert <v2f64, v4i32, VReg_128>;
 603 def : BitConvert <v4f32, v2f64, VReg_128>;
 604 def : BitConvert <v4i32, v2f64, VReg_128>;
 605 def : BitConvert <v2i64, v2f64, VReg_128>;
 606 def : BitConvert <v2f64, v2i64, VReg_128>;
 607
 608 // 256-bit bitcast
 609 def : BitConvert <v8i32, v8f32, SReg_256>;
 610 def : BitConvert <v8f32, v8i32, SReg_256>;
 611 def : BitConvert <v8i32, v8f32, VReg_256>;
 612 def : BitConvert <v8f32, v8i32, VReg_256>;
 613
 614 // 512-bit bitcast
 615 def : BitConvert <v16i32, v16f32, VReg_512>;
 616 def : BitConvert <v16f32, v16i32, VReg_512>;
 617
 618 /********** =================== **********/
 619 /********** Src & Dst modifiers **********/
 620 /********** =================== **********/
 621
 622 def : Pat <
 623   (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),
 624                (f32 FP_ZERO), (f32 FP_ONE)),
 625   (V_ADD_F32_e64 $src0_modifiers, $src0, 0, (i32 0), 1, $omod)
 626 >;
 627
 628 /********** ================================ **********/
 629 /********** Floating point absolute/negative **********/
 630 /********** ================================ **********/
 631
 632 // Prevent expanding both fneg and fabs.
 633
 634 def : Pat <
 635   (fneg (fabs f32:$src)),
 636   (S_OR_B32 $src, (S_MOV_B32(i32 0x80000000))) // Set sign bit
 637 >;
 638
 639 // FIXME: Should use S_OR_B32
 640 def : Pat <
 641   (fneg (fabs f64:$src)),
 642   (REG_SEQUENCE VReg_64,
 643     (i32 (EXTRACT_SUBREG f64:$src, sub0)),
 644     sub0,
 645     (V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
 646                   (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
 647     sub1)
 648 >;
 649
 650 def : Pat <
 651   (fabs f32:$src),
 652   (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x7fffffff)))
 653 >;
 654
 655 def : Pat <
 656   (fneg f32:$src),
 657   (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000)))
 658 >;
 659
 660 def : Pat <
 661   (fabs f64:$src),
 662   (REG_SEQUENCE VReg_64,
 663     (i32 (EXTRACT_SUBREG f64:$src, sub0)),
 664     sub0,
 665     (V_AND_B32_e64 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
 666                    (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit.
 667      sub1)
 668 >;
 669
 670 def : Pat <
 671   (fneg f64:$src),
 672   (REG_SEQUENCE VReg_64,
 673     (i32 (EXTRACT_SUBREG f64:$src, sub0)),
 674     sub0,
 675     (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
 676                    (i32 (V_MOV_B32_e32 (i32 0x80000000)))),
 677     sub1)
 678 >;
 679
 680 def : Pat <
 681   (fneg f16:$src),
 682   (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000)))
 683 >;
 684
 685 def : Pat <
 686   (fabs f16:$src),
 687   (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x00007fff)))
 688 >;
 689
 690 def : Pat <
 691   (fneg (fabs f16:$src)),
 692   (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
 693 >;
 694
 695 /********** ================== **********/
 696 /********** Immediate Patterns **********/
 697 /********** ================== **********/
 698
 699 def : Pat <
 700   (VGPRImm<(i32 imm)>:$imm),
 701   (V_MOV_B32_e32 imm:$imm)
 702 >;
 703
 704 def : Pat <
 705   (VGPRImm<(f32 fpimm)>:$imm),
 706   (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
 707 >;
 708
 709 def : Pat <
 710   (i32 imm:$imm),
 711   (S_MOV_B32 imm:$imm)
 712 >;
 713
 714 // FIXME: Workaround for ordering issue with peephole optimizer where
 715 // a register class copy interferes with immediate folding.  Should
 716 // use s_mov_b32, which can be shrunk to s_movk_i32
 717 def : Pat <
 718   (VGPRImm<(f16 fpimm)>:$imm),
 719   (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
 720 >;
 721
 722 def : Pat <
 723   (f32 fpimm:$imm),
 724   (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
 725 >;
 726
 727 def : Pat <
 728   (f16 fpimm:$imm),
 729   (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
 730 >;
 731
 732 def : Pat <
 733  (i32 frameindex:$fi),
 734  (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi)))
 735 >;
 736
 737 def : Pat <
 738   (i64 InlineImm<i64>:$imm),
 739   (S_MOV_B64 InlineImm<i64>:$imm)
 740 >;
 741
 742 // XXX - Should this use a s_cmp to set SCC?
 743
 744 // Set to sign-extended 64-bit value (true = -1, false = 0)
 745 def : Pat <
 746   (i1 imm:$imm),
 747   (S_MOV_B64 (i64 (as_i64imm $imm)))
 748 >;
 749
 750 def : Pat <
 751   (f64 InlineFPImm<f64>:$imm),
 752   (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm<f64>:$imm)))
 753 >;
 754
 755 /********** ================== **********/
 756 /********** Intrinsic Patterns **********/
 757 /********** ================== **********/
 758
 759 def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
 760
 761 def : Pat <
 762   (int_AMDGPU_cube v4f32:$src),
 763   (REG_SEQUENCE VReg_128,
 764     (V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
 765                   0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)),
 766                   0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)),
 767                   0 /* clamp */, 0 /* omod */), sub0,
 768     (V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
 769                   0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
 770                   0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
 771                   0 /* clamp */, 0 /* omod */), sub1,
 772     (V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
 773                   0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
 774                   0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
 775                   0 /* clamp */, 0 /* omod */), sub2,
 776     (V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
 777                   0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
 778                   0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
 779                   0 /* clamp */, 0 /* omod */), sub3)
 780 >;
 781
 782 def : Pat <
 783   (i32 (sext i1:$src0)),
 784   (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
 785 >;
 786
 787 class Ext32Pat <SDNode ext> : Pat <
 788   (i32 (ext i1:$src0)),
 789   (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
 790 >;
 791
 792 def : Ext32Pat <zext>;
 793 def : Ext32Pat <anyext>;
 794
 795 // The multiplication scales from [0,1] to the unsigned integer range
 796 def : Pat <
 797   (AMDGPUurecip i32:$src0),
 798   (V_CVT_U32_F32_e32
 799     (V_MUL_F32_e32 (i32 CONST.FP_UINT_MAX_PLUS_1),
 800                    (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
 801 >;
 802
 803 //===----------------------------------------------------------------------===//
 804 // VOP3 Patterns
 805 //===----------------------------------------------------------------------===//
 806
 807 def : IMad24Pat<V_MAD_I32_I24>;
 808 def : UMad24Pat<V_MAD_U32_U24>;
 809
 810 defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
 811 def : ROTRPattern <V_ALIGNBIT_B32>;
 812
 813 /********** ====================== **********/
 814 /**********   Indirect addressing  **********/
 815 /********** ====================== **********/
 816
 817 multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, string VecSize> {
 818   // Extract with offset
 819   def : Pat<
 820     (eltvt (extractelt vt:$src, (MOVRELOffset i32:$idx, (i32 imm:$offset)))),
 821     (!cast<Instruction>("SI_INDIRECT_SRC_"#VecSize) $src, $idx, imm:$offset)
 822   >;
 823
 824   // Insert with offset
 825   def : Pat<
 826     (insertelt vt:$src, eltvt:$val, (MOVRELOffset i32:$idx, (i32 imm:$offset))),
 827     (!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $src, $idx, imm:$offset, $val)
 828   >;
 829 }
 830
 831 defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;
 832 defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;
 833 defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;
 834 defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;
 835
 836 defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;
 837 defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;
 838 defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
 839 defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;
 840
 841 //===----------------------------------------------------------------------===//
 842 // SAD Patterns
 843 //===----------------------------------------------------------------------===//
 844
 845 def : Pat <
 846   (add (sub_oneuse (umax i32:$src0, i32:$src1),
 847                    (umin i32:$src0, i32:$src1)),
 848        i32:$src2),
 849   (V_SAD_U32 $src0, $src1, $src2)
 850 >;
 851
 852 def : Pat <
 853   (add (select_oneuse (i1 (setugt i32:$src0, i32:$src1)),
 854                       (sub i32:$src0, i32:$src1),
 855                       (sub i32:$src1, i32:$src0)),
 856        i32:$src2),
 857   (V_SAD_U32 $src0, $src1, $src2)
 858 >;
 859
 860 //===----------------------------------------------------------------------===//
 861 // Conversion Patterns
 862 //===----------------------------------------------------------------------===//
 863
 864 def : Pat<(i32 (sext_inreg i32:$src, i1)),
 865   (S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16
 866
 867 // Handle sext_inreg in i64
 868 def : Pat <
 869   (i64 (sext_inreg i64:$src, i1)),
 870   (S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16
 871 >;
 872
 873 def : Pat <
 874   (i16 (sext_inreg i16:$src, i1)),
 875   (S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16
 876 >;
 877
 878 def : Pat <
 879   (i16 (sext_inreg i16:$src, i8)),
 880   (S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16
 881 >;
 882
 883 def : Pat <
 884   (i64 (sext_inreg i64:$src, i8)),
 885   (S_BFE_I64 i64:$src, (i32 0x80000)) // 0 | 8 << 16
 886 >;
 887
 888 def : Pat <
 889   (i64 (sext_inreg i64:$src, i16)),
 890   (S_BFE_I64 i64:$src, (i32 0x100000)) // 0 | 16 << 16
 891 >;
 892
 893 def : Pat <
 894   (i64 (sext_inreg i64:$src, i32)),
 895   (S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16
 896 >;
 897
 898 def : Pat <
 899   (i64 (zext i32:$src)),
 900   (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 (i32 0)), sub1)
 901 >;
 902
 903 def : Pat <
 904   (i64 (anyext i32:$src)),
 905   (REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
 906 >;
 907
 908 class ZExt_i64_i1_Pat <SDNode ext> : Pat <
 909   (i64 (ext i1:$src)),
 910     (REG_SEQUENCE VReg_64,
 911       (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
 912       (S_MOV_B32 (i32 0)), sub1)
 913 >;
 914
 915
 916 def : ZExt_i64_i1_Pat<zext>;
 917 def : ZExt_i64_i1_Pat<anyext>;
 918
 919 // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that
 920 // REG_SEQUENCE patterns don't support instructions with multiple outputs.
 921 def : Pat <
 922   (i64 (sext i32:$src)),
 923     (REG_SEQUENCE SReg_64, $src, sub0,
 924     (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, (i32 31)), SReg_32_XM0)), sub1)
 925 >;
 926
 927 def : Pat <
 928   (i64 (sext i1:$src)),
 929   (REG_SEQUENCE VReg_64,
 930     (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0,
 931     (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1)
 932 >;
 933
 934 class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : Pat <
 935   (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
 936   (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
 937 >;
 938
 939 def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
 940 def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, i32, f32, fp_to_sint>;
 941 def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, i64, f64, fp_to_uint>;
 942 def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, i64, f64, fp_to_sint>;
 943
 944 // If we need to perform a logical operation on i1 values, we need to
 945 // use vector comparisons since there is only one SCC register. Vector
 946 // comparisons still write to a pair of SGPRs, so treat these as
 947 // 64-bit comparisons. When legalizing SGPR copies, instructions
 948 // resulting in the copies from SCC to these instructions will be
 949 // moved to the VALU.
 950 def : Pat <
 951   (i1 (and i1:$src0, i1:$src1)),
 952   (S_AND_B64 $src0, $src1)
 953 >;
 954
 955 def : Pat <
 956   (i1 (or i1:$src0, i1:$src1)),
 957   (S_OR_B64 $src0, $src1)
 958 >;
 959
 960 def : Pat <
 961   (i1 (xor i1:$src0, i1:$src1)),
 962   (S_XOR_B64 $src0, $src1)
 963 >;
 964
 965 def : Pat <
 966   (f32 (sint_to_fp i1:$src)),
 967   (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
 968 >;
 969
 970 def : Pat <
 971   (f32 (uint_to_fp i1:$src)),
 972   (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src)
 973 >;
 974
 975 def : Pat <
 976   (f64 (sint_to_fp i1:$src)),
 977   (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
 978 >;
 979
 980 def : Pat <
 981   (f64 (uint_to_fp i1:$src)),
 982   (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
 983 >;
 984
 985 //===----------------------------------------------------------------------===//
 986 // Miscellaneous Patterns
 987 //===----------------------------------------------------------------------===//
 988
 989 def : Pat <
 990   (i32 (trunc i64:$a)),
 991   (EXTRACT_SUBREG $a, sub0)
 992 >;
 993
 994 def : Pat <
 995   (i1 (trunc i32:$a)),
 996   (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
 997 >;
 998
 999 def : Pat <
1000   (i1 (trunc i16:$a)),
1001   (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
1002 >;
1003
1004 def : Pat <
1005   (i1 (trunc i64:$a)),
1006   (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1),
1007                     (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
1008 >;
1009
1010 def : Pat <
1011   (i32 (bswap i32:$a)),
1012   (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)),
1013              (V_ALIGNBIT_B32 $a, $a, (i32 24)),
1014              (V_ALIGNBIT_B32 $a, $a, (i32 8)))
1015 >;
1016
1017 multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
1018   def : Pat <
1019     (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
1020     (BFM $a, $b)
1021   >;
1022
1023   def : Pat <
1024     (vt (add (vt (shl 1, vt:$a)), -1)),
1025     (BFM $a, (MOV (i32 0)))
1026   >;
1027 }
1028
1029 defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
1030 // FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
1031
1032 def : BFEPattern <V_BFE_U32, S_MOV_B32>;
1033
1034 def : Pat<
1035   (fcanonicalize f16:$src),
1036   (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), 0, $src, 0, 0)
1037 >;
1038
1039 def : Pat<
1040   (fcanonicalize f32:$src),
1041   (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0)
1042 >;
1043
1044 def : Pat<
1045   (fcanonicalize f64:$src),
1046   (V_MUL_F64 0, CONST.FP64_ONE, 0, $src, 0, 0)
1047 >;
1048
1049 //===----------------------------------------------------------------------===//
1050 // Fract Patterns
1051 //===----------------------------------------------------------------------===//
1052
1053 let Predicates = [isSI] in {
1054
1055 // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
1056 // used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
1057 // way to implement it is using V_FRACT_F64.
1058 // The workaround for the V_FRACT bug is:
1059 //    fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
1060
1061 // Convert floor(x) to (x - fract(x))
1062 def : Pat <
1063   (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
1064   (V_ADD_F64
1065       $mods,
1066       $x,
1067       SRCMODS.NEG,
1068       (V_CNDMASK_B64_PSEUDO
1069          (V_MIN_F64
1070              SRCMODS.NONE,
1071              (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
1072              SRCMODS.NONE,
1073              (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
1074              DSTCLAMP.NONE, DSTOMOD.NONE),
1075          $x,
1076          (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /*NaN*/))),
1077       DSTCLAMP.NONE, DSTOMOD.NONE)
1078 >;
1079
1080 } // End Predicates = [isSI]
1081
1082 //============================================================================//
1083 // Miscellaneous Optimization Patterns
1084 //============================================================================//
1085
1086 def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
1087
1088 def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>;
1089 def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>;
1090
1091 //============================================================================//
1092 // Assembler aliases
1093 //============================================================================//
1094
1095 def : MnemonicAlias<"v_add_u32", "v_add_i32">;
1096 def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
1097 def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
1098
1099 } // End isGCN predicate